In [1]:
import spacy
nlp = spacy.load('en')

In [2]:
doc = nlp(u'I am learning how to build chatbots')

In [3]:
for token in doc:
    print(token.text, token.pos_)

I PRON
am VERB
learning VERB
how ADV
to PART
build VERB
chatbots NOUN


In [4]:
doc = nlp(u'Google release "Move Mirror" AI experiment that matches your pose from 80,000 images')
for token in doc:
    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
        token.shape_, token.is_alpha, token.is_stop)

Google Google PROPN NNP compound Xxxxx True False
release release NOUN NN ROOT xxxx True False
" " PUNCT `` punct " False False
Move Move PROPN NNP nmod Xxxx True True
Mirror Mirror PROPN NNP nmod Xxxxx True False
" " PUNCT '' punct " False False
AI AI PROPN NNP compound XX True False
experiment experiment NOUN NN appos xxxx True False
that that DET WDT nsubj xxxx True True
matches match VERB VBZ relcl xxxx True False
your -PRON- DET PRP$ poss xxxx True True
pose pose NOUN NN dobj xxxx True False
from from ADP IN prep xxxx True True
80,000 80,000 NUM CD nummod dd,ddd False False
images image NOUN NNS pobj xxxx True False


In [5]:
from spacy.lemmatizer import Lemmatizer
from spacy.lang.en import LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES
lemmatizer = Lemmatizer(LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES)
lemmatizer('chuckles', 'NOUN') # 2nd param is token's part-of-speech tag

['chuckle']

In [6]:
lemmatizer('blazing', 'VERB')

['blaze']

In [7]:
lemmatizer('fastest', 'ADJ')

['fast']

In [8]:
my_string = u"Google has its headquarters in Mountain View, California having revenue amounted to 109.65 billion US dollars"
doc = nlp(my_string)
for ent in doc.ents:
    print(ent.text, ent.label_)

Google ORG
Mountain View GPE
California GPE
109.65 billion US dollars MONEY


In [9]:
my_string = u"Mark Zuckerberg born May 14, 1984 in New York is an American technology entrepreneur and philanthropist best known for co-founding and leading Facebook as its chairman and CEO."
doc = nlp(my_string)
for ent in doc.ents:
    print(ent.text, ent.label_)

Mark Zuckerberg PERSON
May 14, 1984 DATE
New York GPE
American NORP
Facebook PERSON


In [10]:
my_string1 = u"Imagine Dragons are the best band."
my_string2 = u"Imagine dragons come and take over the city."
doc1 = nlp(my_string1)
doc2 = nlp(my_string2)
for ent in doc1.ents:
    print(ent.text, ent.label_)


In [11]:
from spacy.lang.en.stop_words import STOP_WORDS
print(STOP_WORDS)

{'beforehand', 'else', 'him', 'whereas', 'has', 'himself', 'thence', 'serious', 'made', 'namely', 'last', 'nine', 'whose', 'forty', 'give', 'former', 'latterly', 'put', 'anyway', 'keep', 'can', 'how', 'our', 'somehow', 'sixty', 'we', '’s', 'at', 'hereby', 'regarding', 'he', 'whenever', 'no', 'empty', 'therefore', 'various', 'move', 'although', 'side', 'to', 'why', 'not', 'everything', 'others', 'that', 'less', 'per', 'there', 'they', 'may', 'something', 'n‘t', 'sometimes', 'thru', 'about', 'one', 'other', 'except', 'say', 'between', 'three', 'whole', 'does', 'none', 'enough', 'eleven', 'onto', 'across', 'was', 'anyone', 'am', 'too', 'several', 'yourselves', 'do', 'are', 'with', 'as', 'five', 'herein', 'whereupon', 'these', 'n’t', 'such', 'nobody', 'much', 'if', 'should', 'than', '‘m', 'whence', 'both', 'front', 'also', 'through', 'themselves', 'hundred', 'thereby', 'up', 'please', 'two', 'then', 'ourselves', 'anyhow', 'down', 'thus', 'moreover', 'some', 'hers', 'back', 'yours', 'by', '

In [12]:
len(STOP_WORDS)

326

In [13]:
doc = nlp(u'Book me a flight from Bangalore to Goa')
blr, goa = doc[5], doc[7]
list(blr.ancestors)

[from, flight, Book]

In [14]:
list(goa.ancestors)

[to, flight, Book]

In [15]:
doc = nlp(u'Book a table at the restaurant and the taxi to the hotel')
tasks = doc[2], doc[8] #(table, taxi)
tasks_target = doc[5], doc[11] #(restaurant, hotel)
for task in tasks_target:
    for tok in task.ancestors:
        if tok in tasks:
            print("Booking of {} belongs to {}".format(tok, task))
#     break

Booking of table belongs to restaurant
Booking of table belongs to hotel


In [16]:
from spacy import displacy
doc = nlp(u'Book a table at the restaurant and the taxi to the hotel')
displacy.render(doc, style='dep')

In [19]:
doc = nlp(u"What are some places to visit in Berlin and stay in Lubeck")
places = [doc[7], doc[11]] #[Berlin, Lubeck]
actions = [doc[5], doc[9]] #[visit, stay]
for place in places:
    for tok in place.ancestors:
        if tok in actions:
            print("User is referring {} to {}".format(place, tok))
            break
            

User is referring Berlin to visit
User is referring Lubeck to stay


In [21]:
doc = nlp(u"Boston Dynamics is gearing up to produce thousands of robot dogs")
list(doc.noun_chunks)

[Boston Dynamics, thousands, robot dogs]

In [22]:
doc = nlp(u"Deep learning cracks the code of messenger RNAs and protein-­ coding potential")
for chunk in doc.noun_chunks:
    print(chunk.text, chunk.root.text, chunk.root.dep_,
          chunk.root.head.text)

Deep learning learning nsubj cracks
the code code dobj cracks
messenger RNAs RNAs pobj of
protein-­ coding potential potential conj code
