In [1]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [2]:
from nltk.chunk import RegexpParser
from nltk.tokenize import word_tokenize

In [14]:
sentence = "Education is the transmission of knowledge, skills, and character traits. "

### Tokenization

In [15]:
tokens = word_tokenize(sentence)

In [16]:
tokens

['Education',
 'is',
 'the',
 'transmission',
 'of',
 'knowledge',
 ',',
 'skills',
 ',',
 'and',
 'character',
 'traits',
 '.']

### POS tagging

In [17]:
pos_tags = nltk.pos_tag(tokens)

In [18]:
pos_tags

[('Education', 'NN'),
 ('is', 'VBZ'),
 ('the', 'DT'),
 ('transmission', 'NN'),
 ('of', 'IN'),
 ('knowledge', 'NN'),
 (',', ','),
 ('skills', 'NNS'),
 (',', ','),
 ('and', 'CC'),
 ('character', 'NN'),
 ('traits', 'NNS'),
 ('.', '.')]

### Chunking patterns

In [19]:
chunk_patterns = r"""
    NP: {<DT>?<JJ>*<NN>}  # Chunk noun phrases
    VP: {<VB.*><NP|PP>}  # Chunk verb phrases
"""

In [20]:
chunk_patterns

'\n    NP: {<DT>?<JJ>*<NN>}  # Chunk noun phrases\n    VP: {<VB.*><NP|PP>}  # Chunk verb phrases\n'

### Create a chunk parser

In [21]:
chunk_parser = RegexpParser(chunk_patterns)

In [22]:
chunk_parser

<chunk.RegexpParser with 2 stages>

### Perform chunking

In [23]:
result = chunk_parser.parse(pos_tags)

In [24]:
print(result)

(S
  (NP Education/NN)
  (VP is/VBZ (NP the/DT transmission/NN))
  of/IN
  (NP knowledge/NN)
  ,/,
  skills/NNS
  ,/,
  and/CC
  (NP character/NN)
  traits/NNS
  ./.)
