In [1]:
# Using NLTK

import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

text = "Barack Obama was the 44th president of the United States and he was born on August 4, 1961."

# Tokenize and POS Tagging
tokens = word_tokenize(text)
tags = pos_tag(tokens)
print("POS tagging with NLTK:")
print(tags)

# Named Entity Recognition
ner_tree = ne_chunk(tags)
print("\nNER with NLTK:")
for subtree in ner_tree:
    if isinstance(subtree, nltk.Tree):
        entity = " ".join([word for word, tag in subtree.leaves()])
        print(entity, "->", subtree.label())

[nltk_data] Downloading package punkt to /home/vc/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/vc/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /home/vc/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /home/vc/nltk_data...
[nltk_data]   Package words is already up-to-date!


POS tagging with NLTK:
[('Barack', 'NNP'), ('Obama', 'NNP'), ('was', 'VBD'), ('the', 'DT'), ('44th', 'JJ'), ('president', 'NN'), ('of', 'IN'), ('the', 'DT'), ('United', 'NNP'), ('States', 'NNPS'), ('and', 'CC'), ('he', 'PRP'), ('was', 'VBD'), ('born', 'VBN'), ('on', 'IN'), ('August', 'NNP'), ('4', 'CD'), (',', ','), ('1961', 'CD'), ('.', '.')]

NER with NLTK:
Barack -> PERSON
Obama -> PERSON
United States -> GPE


In [2]:
# Using spacy

import spacy

nlp = spacy.load("en_core_web_sm")
doc = nlp(text)

# POS Tagging
print("POS tagging with spaCy:")
for token in doc:
    print(token.text, "->", token.pos_)

# Named Entity Recognition
print("\nNER with spaCy:")
for ent in doc.ents:
    print(ent.text, "->", ent.label_)

POS tagging with spaCy:
Barack -> PROPN
Obama -> PROPN
was -> AUX
the -> DET
44th -> ADJ
president -> NOUN
of -> ADP
the -> DET
United -> PROPN
States -> PROPN
and -> CCONJ
he -> PRON
was -> AUX
born -> VERB
on -> ADP
August -> PROPN
4 -> NUM
, -> PUNCT
1961 -> NUM
. -> PUNCT

NER with spaCy:
Barack Obama -> PERSON
44th -> ORDINAL
the United States -> GPE
August 4, 1961 -> DATE


In [3]:
# Using TextBlob
from textblob import TextBlob

blob = TextBlob(text)

# POS Tagging
print("POS tagging with TextBlob:")
print(blob.tags)

# Named Entity Recognition
# Note: TextBlob uses NLTK's NER under the hood, so the results will be similar.
print("\nNER with TextBlob:")
for np in blob.noun_phrases:
    print(np)

POS tagging with TextBlob:
[('Barack', 'NNP'), ('Obama', 'NNP'), ('was', 'VBD'), ('the', 'DT'), ('44th', 'JJ'), ('president', 'NN'), ('of', 'IN'), ('the', 'DT'), ('United', 'NNP'), ('States', 'NNPS'), ('and', 'CC'), ('he', 'PRP'), ('was', 'VBD'), ('born', 'VBN'), ('on', 'IN'), ('August', 'NNP'), ('4', 'CD'), ('1961', 'CD')]

NER with TextBlob:
barack obama
44th president
august
