In [2]:
# POS: Part of the Speech Tagging
# -------------------------------
# Definition:
#   Is a NLP process that categorizes words in a text in correspondence 
#   with a particular part of the speech (a noun, a verbe, an adjective, etc),
#   depending on the definition of the word and its context [1].

# Notes
# - Within a sentence the same words in a different order can mean something different.
# - A POS Tag that is assigned depending of the current state (meaning) of the current word

# The process of assigning a specific tag to a word is referred to a POS tagging.

# Resources:
# - 1. https://towardsdatascience.com/part-of-speech-tagging-for-beginners-3a0754b2ebba
# - 2. https://spacy.io/usage/linguistic-features

*Part Of the Specch Tagging*
![](images/pos.png)

In [3]:
import spacy

nlp = spacy.load('en_core_web_sm')

In [9]:
text = "I'd like to order the biggest pizza you have. The ingredients are chicken, cheese, tomato and pepperoni"
doc = nlp(text)

In [18]:
for tk in doc:
    print(f"{tk.text:{10}}{tk.pos_:{10}}{tk.tag_:{10}}{spacy.explain(tk.tag_):{10}}")

I         PRON      PRP       pronoun, personal
'd        VERB      MD        verb, modal auxiliary
like      VERB      VB        verb, base form
to        PART      TO        infinitival to
order     VERB      VB        verb, base form
the       DET       DT        determiner
biggest   ADJ       JJS       adjective, superlative
pizza     NOUN      NN        noun, singular or mass
you       PRON      PRP       pronoun, personal
have      VERB      VBP       verb, non-3rd person singular present
.         PUNCT     .         punctuation mark, sentence closer
The       DET       DT        determiner
ingredientsNOUN      NNS       noun, plural
are       VERB      VBP       verb, non-3rd person singular present
chicken   NOUN      NN        noun, singular or mass
,         PUNCT     ,         punctuation mark, comma
cheese    NOUN      NN        noun, singular or mass
,         PUNCT     ,         punctuation mark, comma
tomato    NOUN      NN        noun, singular or mass
and       CCONJ 

In [24]:
# Testing text context
doc1 = nlp("I read books on NLP") # Present context
doc2 = nlp("I read a book on NLP") # Past context
# Let's look the word read
tk1 = doc1[1]
tk2 = doc2[1]

def show_token_info(token):
    print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

print("Token for doc1")
show_token_info(tk1)
print("-"*30)
print("Token for doc2")
show_token_info(tk2)

Token for doc1
read       VERB       VBP        verb, non-3rd person singular present
------------------------------
Token for doc2
read       VERB       VBD        verb, past tense


In [29]:
# Counting number of times a POS tag appears in a text
# doc.count_by(attr) -> Returns a dictionary where key is an id and the value is the count -> Dict[int, int]
pos_tags_count = doc.count_by(spacy.attrs.POS)
print(pos_tags_count)
for k,v in pos_tags_count.items():
    print(f"{doc.vocab[k].text}: {v}")


{96: 3, 99: 5, 83: 1, 88: 1, 89: 2, 91: 6, 93: 1, 94: 2}
PUNCT: 3
VERB: 5
ADJ: 1
CCONJ: 1
DET: 2
NOUN: 6
PART: 1
PRON: 2


In [32]:
# Visualizing POS with displacy
from spacy import displacy

displacy.render(doc, style='dep', jupyter=True)