In [1]:
import spacy

In [2]:
nlp=spacy.load('en_core_web_sm')

In [3]:
#takes the unicode string to convert it into several components
doc=nlp(u'Umair Ansari is going to be rich soon.')

In [4]:
doc.text

'Umair Ansari is going to be rich soon.'

In [5]:
#smart enough to tell that which part of speech is it like if it is an adverb or verb, or noun, or proper noun etc 
for token in doc:
    print(token.text,token.pos,token.pos_,token.dep_)
    

Umair 96 PROPN compound
Ansari 96 PROPN nsubj
is 100 VERB aux
going 100 VERB ROOT
to 94 PART aux
be 100 VERB xcomp
rich 84 ADJ acomp
soon 86 ADV advmod
. 97 PUNCT punct


In [6]:
nlp.pipeline

[('tagger', <spacy.pipeline.pipes.Tagger at 0x97ba54860>),
 ('parser', <spacy.pipeline.pipes.DependencyParser at 0x97bbb0588>),
 ('ner', <spacy.pipeline.pipes.EntityRecognizer at 0x97bbb05e8>)]

In [7]:
nlp.pipe_names

['tagger', 'parser', 'ner']

In [8]:
from spacy import displacy

In [9]:
mystring="we're moving to L.A.!"

In [10]:
doc =nlp(mystring)

In [11]:
#doing tokenization
for token in doc:
    print(token.text,end="|")

we|'re|moving|to|L.A.|!|

In [12]:
doc2 = nlp(u"We're here to help! Send snail-mail, email support@oursite.com or visit us at http://www.oursite.com!")


In [13]:
doc2

We're here to help! Send snail-mail, email support@oursite.com or visit us at http://www.oursite.com!

In [14]:
for t in doc2:
    print(t)

We
're
here
to
help
!
Send
snail
-
mail
,
email
support@oursite.com
or
visit
us
at
http://www.oursite.com
!


In [15]:
#spacy itself also takes cares of $characters
doc3 = nlp(u'A 5km NYC cab ride costs $10.30')

for t in doc3:
    print(t)

A
5
km
NYC
cab
ride
costs
$
10.30


In [18]:
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")

for t in doc4:
    print(t,end="|")

Let|'s|visit|St.|Louis|in|the|U.S.|next|year|.|

In [17]:
len(doc4)

11

In [19]:
#counting the vocabulary of the library as we have loaded the light version
# it will be 58k something
len(doc.vocab)


517

In [22]:
doc5=nlp(u'My name is Umair Ansari.')

doc5[2]#this will retreive the 3rd token
doc5[1:4]#slicing the tokens

name is Umair

In [23]:
doc6 = nlp(u'My dinner was horrible.')
doc7 = nlp(u'Your dinner was delicious.')

In [24]:
doc6[2]=doc7[4]

TypeError: 'spacy.tokens.doc.Doc' object does not support item assignment

In [25]:
doc8 = nlp(u'Apple to build a Hong Kong factory for $6 million')

In [28]:
for token in doc8.ents:
    print(token.text+"-"+token.label_+"-"+str(spacy.explain(token.label_)))
    

Apple-ORG-Companies, agencies, institutions, etc.
Hong Kong-GPE-Countries, cities, states
$6 million-MONEY-Monetary values, including unit


In [29]:
#noun chunks
doc9 = nlp(u"Autonomous cars shift insurance liability toward manufacturers.")


In [30]:
for chunks in doc9.noun_chunks:
    print(chunks.text)

Autonomous cars
insurance liability
manufacturers


In [31]:
#visualization
doc = nlp(u'Apple is going to build a U.K. factory for $6 million.')

In [33]:
displacy.render(doc,jupyter=True,style="dep",options={"distance":80})

In [34]:
displacy.render(doc,jupyter=True,style="ent")

In [None]:
displacy.serve(doc,style="dep")

  "__main__", mod_spec)



Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...



127.0.0.1 - - [28/Aug/2019 23:09:23] "GET / HTTP/1.1" 200 10066
127.0.0.1 - - [28/Aug/2019 23:09:24] "GET /favicon.ico HTTP/1.1" 200 10066
