In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
doc = nlp(u"the quick brown fox jumped over the lazy dog's back.")

In [3]:
# view token tags
print(doc.text)

the quick brown fox jumped over the lazy dog's back.


In [4]:
print(doc[4].text,doc[4].pos_,doc[4].tag_,spacy.explain(doc[4].tag_))

jumped VERB VBD verb, past tense


In [5]:
for token in doc:
  print(f'{token.text:{10}} {token.pos_:{8}} {token.tag_:{6}} {spacy.explain(token.tag_)}')

the        DET      DT     determiner
quick      ADJ      JJ     adjective (English), other noun-modifier (Chinese)
brown      ADJ      JJ     adjective (English), other noun-modifier (Chinese)
fox        NOUN     NN     noun, singular or mass
jumped     VERB     VBD    verb, past tense
over       ADP      IN     conjunction, subordinating or preposition
the        DET      DT     determiner
lazy       ADJ      JJ     adjective (English), other noun-modifier (Chinese)
dog        NOUN     NN     noun, singular or mass
's         PART     POS    possessive ending
back       NOUN     NN     noun, singular or mass
.          PUNCT    .      punctuation mark, sentence closer


In [6]:
# fine-grained part of speech
#  working with pos tags
doc = nlp(u'I read books on nlp')
r = doc[1]
print(f'{r.text:{10}} {r.pos_:{8}} {r.tag_:{6}} {spacy.explain(r.tag_)}')

read       VERB     VBP    verb, non-3rd person singular present


In [7]:
doc = nlp(u'I read a book on nlp')
r = doc[1]
print(f'{r.text:{10}} {r.pos_:{8}} {r.tag_:{6}} {spacy.explain(r.tag_)}')

read       VERB     VBD    verb, past tense


In [9]:
#counting pos tags
doc = nlp(u"the quick brown fox jumped over the lazy dog's back.")
POS_counts = doc.count_by(spacy.attrs.POS)
POS_counts

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}

In [10]:
doc.vocab[90].text

'DET'

In [11]:
for k,v in sorted(POS_counts.items()):
  print(f'{k}. {doc.vocab[k].text:{5}}:{v}')

84. ADJ  :3
85. ADP  :1
90. DET  :2
92. NOUN :3
94. PART :1
97. PUNCT:1
100. VERB :1


In [13]:
# fine-grained tags
TAG_counts = doc.count_by(spacy.attrs.TAG)
for k,v in sorted(TAG_counts.items()):
  print(f'{k}. {doc.vocab[k].text:{4}}:{v}')

74. POS :1
1292078113972184607. IN  :1
10554686591937588953. JJ  :3
12646065887601541794. .   :1
15267657372422890137. DT  :2
15308085513773655218. NN  :3
17109001835818727656. VBD :1


In [15]:
DEP_counts = doc.count_by(spacy.attrs.DEP)
for k,v in sorted(DEP_counts.items()):
  print(f'{k}. {doc.vocab[k].text:{4}}:{v}')

402. amod:3
415. det :2
429. nsubj:1
439. pobj:1
440. poss:1
443. prep:1
445. punct:1
8110129090154140942. case:1
8206900633647566924. ROOT:1


**visualizing parts of speech**

In [16]:
import spacy
nlp = spacy.load('en_core_web_sm')
from spacy import displacy

In [17]:
doc = nlp(u"the quick brown fox jumped over the lazy dog's back.")
displacy.render(doc,style='dep',jupyter=True,options={'distance':110})