## Parts of Speech Tagging:

In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [18]:
doc = nlp(u"The quick brown fox jumped over the lazy dog")

In [4]:
print(doc.text)

Th quick brown fox jumped over the lazy dog


In [5]:
print(doc[5])

over


In [6]:
print(doc[4])

jumped


In [7]:
# now we can grab the various attributes of the token tags, like

print(doc[5].text)
print(doc[5].pos_)

over
ADP


In [8]:
print(doc[5].tag_) # reports back the fine grained tag of the word

IN


In [9]:
print(doc[6].tag_)

DT


In [10]:
print(doc[5].tag) # if we just call 'tag' attribute, it's going
# to report back the numerical id corresponding to that thag

1292078113972184607


In [11]:
# and same the POS as well
print(doc[5].pos)

84


In [12]:
print(doc[4].pos)

99


In [19]:
for token in doc:
    print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}") # spacy.explain() -> explains what token.tag_ means

The        DET        DT         determiner
quick      ADJ        JJ         adjective
brown      ADJ        JJ         adjective
fox        NOUN       NN         noun, singular or mass
jumped     VERB       VBD        verb, past tense
over       ADP        IN         conjunction, subordinating or preposition
the        DET        DT         determiner
lazy       ADJ        JJ         adjective
dog        NOUN       NN         noun, singular or mass


In [20]:
# cool :)

In [21]:
doc1 = nlp(u"I read books on NLP")

word = doc1[1]

In [23]:
print(word)

read


In [24]:
type(word)

spacy.tokens.token.Token

In [26]:
token = word
print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

read       VERB       VBP        verb, non-3rd person singular present


In [30]:
# consider another example!

doc2 = nlp(u"I read a book on NLP")

word = doc2[1]
token = word
print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")


read       VERB       VBD        verb, past tense


In [31]:
# Now in above both examples, notice the difference that, spacy is smart enough to understand the contextual
# difference between two sentences. The word is exactly the same in both cases but due to the contextual meaninngs
# spacy is referring different explanations!

In [34]:
doc = nlp(u"The quick brown fox jumped over the lazy dog")

POS_counts = doc.count_by(spacy.attrs.POS)

print(POS_counts) # reports back the frequency of the POS numerical id's in context

# Now let's go ahead and grab the actual POS tag for the numerical id's

{83: 3, 99: 1, 84: 1, 89: 2, 91: 2}


In [37]:
doc[3].pos_

'NOUN'

In [43]:
for k,v in sorted(POS_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}} {v}") # reports back the text for every key inside our POS_count dictionary
    

83. ADJ   3
84. ADP   1
89. DET   2
91. NOUN  2
99. VERB  1


In [45]:
# Now let's do it for the 'fine-grained tags' as well!
TAG_counts = doc.count_by(spacy.attrs.TAG)

for k,v in sorted(TAG_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}} {v}")

1292078113972184607. IN    1
10554686591937588953. JJ    3
15267657372422890137. DT    2
15308085513773655218. NN    2
17109001835818727656. VBD   1


In [51]:
# Now let's finally count the syntactic dependencies in our doc:

DEP_counts = doc.count_by(spacy.attrs.DEP)

for k,v in sorted(DEP_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}} {v}")

# Less common ones are having larger number associated!

399. amod  3
412. det   2
426. nsubj 1
436. pobj  1
440. prep  1
8206900633647566924. ROOT  1


## Visualization (POS Tagging):

In [52]:
from spacy import displacy

In [53]:
doc = nlp(u"The quick brown fox jumped over the lazy dog")

In [54]:
displacy.render(doc, style = 'dep', jupyter = True)

In [61]:
# Now let's go ahead and customize our visualization a bit!
# so for that I'm gonna create a 'options' dictionary first!

options = {'distance':110, 'compact':'True', 'color':'yellow', 'bg':'#09a3d5', 'fonts':'Times'}

# bg -> background color in HEX formate
# compact -> going to compact down the curvers of the arrows signs!

In [60]:
displacy.render(doc, style = 'dep', jupyter = True, options = options)

# coooooll :)

In [62]:
# mapping multiple sentences

doc2 = nlp(u"This is a sentence. This is a another sentence, possibly longer than the first.")

In [63]:
spans = list(doc2.sents)

In [64]:
spans

[This is a sentence.,
 This is a another sentence, possibly longer than the first.]

In [None]:
displacy.serve(spans, style = 'dep', options = {'distance':110})


[93m    Serving on port 5000...[0m
    Using the 'dep' visualizer



127.0.0.1 - - [05/Aug/2019 10:57:12] "GET / HTTP/1.1" 200 10329
127.0.0.1 - - [05/Aug/2019 10:57:12] "GET /favicon.ico HTTP/1.1" 200 10329


In [None]:
# hope over to the local host -> 127.0.0.1:5000 and visualize the results!