PARSING TREE

Tokenize and Perform parts-of-speech tagging for the following sentences
using python program.
- > Draw the parsing tree for the same using python program.
- > They wind back the clock, while we chase after the wind
- > The curious child opened the old wooden box near the river bank.
- > The scientist who discovered the vaccine received an international award.
- > After the heavy rain stopped, the children played happily in the garden.

In [19]:
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk import CFG
from nltk.parse import ChartParser

# -----------------------------
# DOWNLOAD REQUIRED PACKAGES
# -----------------------------
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# -----------------------------
# SENTENCES
# -----------------------------
sentences = [
    "They wind back the clock while we chase after the wind",
    "The curious child opened the old wooden box near the river bank",
    "The scientist who discovered the vaccine received an international award",
    "After the heavy rain stopped , the children played happily in the garden"
]

# -----------------------------
# PART A: TOKENIZATION + POS TAGGING
# -----------------------------
print("\n=========== TOKENIZATION & POS TAGGING ===========\n")

for sentence in sentences:
    tokens = word_tokenize(sentence)
    tagged = pos_tag(tokens)
    
    print("Sentence:", sentence)
    print("Tokens :", tokens)
    print("POS Tags :", tagged)
    print()

# -----------------------------
# PART B: CFG GRAMMAR
# -----------------------------
grammar = CFG.fromstring("""
S -> NP VP
S -> S Conj S
S -> SubClause ',' S

SubClause -> SubConj NP VP

NP -> Det N
NP -> Det Adj N
NP -> Det Adj Adj N
NP -> Det N N
NP -> N
NP -> Pron
NP -> NP PP
NP -> NP RelClause

VP -> V
VP -> V NP
VP -> V NP PP
VP -> V PP
VP -> V Adv
VP -> V Part
VP -> V Part NP

PP -> P NP

RelClause -> RelPron VP

Det -> 'The' | 'the' | 'an'
Adj -> 'curious' | 'old' | 'wooden' | 'heavy' | 'international'
N -> 'child' | 'box' | 'river' | 'bank' | 'scientist' | 'vaccine' | 'award' | 'rain' | 'children' | 'garden' | 'clock' | 'wind'
V -> 'opened' | 'received' | 'played' | 'stopped' | 'wind' | 'chase' | 'discovered'
Pron -> 'They' | 'we'
Adv -> 'happily'
P -> 'near' | 'in' | 'after'
Part -> 'back'
RelPron -> 'who'
Conj -> 'while'
SubConj -> 'After'
""")

parser = ChartParser(grammar)

# -----------------------------
# PARSING TREE
# -----------------------------
print("\n=========== PARSING TREES ===========\n")

for sentence in sentences:
    print("\nParsing:", sentence)
    tokens = word_tokenize(sentence)
    
    try:
        trees = list(parser.parse(tokens))
        if trees:
            for tree in trees:
                print(tree)
                tree.draw()   # Opens parse tree window
        else:
            print("No parse tree found.")
    except:
        print("Parsing error.")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vijay\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\vijay\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!




Sentence: They wind back the clock while we chase after the wind
Tokens : ['They', 'wind', 'back', 'the', 'clock', 'while', 'we', 'chase', 'after', 'the', 'wind']
POS Tags : [('They', 'PRP'), ('wind', 'VBP'), ('back', 'RB'), ('the', 'DT'), ('clock', 'NN'), ('while', 'IN'), ('we', 'PRP'), ('chase', 'VBP'), ('after', 'IN'), ('the', 'DT'), ('wind', 'NN')]

Sentence: The curious child opened the old wooden box near the river bank
Tokens : ['The', 'curious', 'child', 'opened', 'the', 'old', 'wooden', 'box', 'near', 'the', 'river', 'bank']
POS Tags : [('The', 'DT'), ('curious', 'JJ'), ('child', 'NN'), ('opened', 'VBD'), ('the', 'DT'), ('old', 'JJ'), ('wooden', 'NN'), ('box', 'NN'), ('near', 'IN'), ('the', 'DT'), ('river', 'NN'), ('bank', 'NN')]

Sentence: The scientist who discovered the vaccine received an international award
Tokens : ['The', 'scientist', 'who', 'discovered', 'the', 'vaccine', 'received', 'an', 'international', 'award']
POS Tags : [('The', 'DT'), ('scientist', 'NN'), ('wh