In [2]:
import nltk
from nltk.chunk.regexp import ChunkString, ChunkRule 
from nltk.tree import Tree 
  
# ChunkString() starts with the flat tree 
tree = Tree('S', [('the', 'DT'), ('book', 'NN'), 
               ('has', 'VBZ'), ('many', 'JJ'), ('chapters', 'NNS')]) 
tree.draw()

In [3]:
chunk_string = ChunkString(tree) 
print ("Chunk String : ", chunk_string) 

Chunk String :   <DT>  <NN>  <VBZ>  <JJ>  <NNS> 


In [4]:
grammar = r"""
  NP: {<DT|JJ|NN.*>+}          # Chunk sequences of DT, JJ, NN
  PP: {<IN><NP>}               # Chunk prepositions followed by NP
  VP: {<VB.*>} # Chunk verbs and their arguments
        
  """

In [7]:
chunker = nltk.RegexpParser(grammar) 
sent=chunker.parse(tree)
sent.draw()


In [8]:
text = "My mother is cooking food for all the family member"
tokens = nltk.word_tokenize(text)
print(tokens)
tag = nltk.pos_tag(tokens)
print(tag)
grammar = "NP: {<DT>?<JJ>*<NN>}"
cp  =nltk.RegexpParser(grammar)
result = cp.parse(tag)
print(result)
result.draw()   

['My', 'mother', 'is', 'cooking', 'food', 'for', 'all', 'the', 'family', 'member']
[('My', 'PRP$'), ('mother', 'NN'), ('is', 'VBZ'), ('cooking', 'VBG'), ('food', 'NN'), ('for', 'IN'), ('all', 'PDT'), ('the', 'DT'), ('family', 'NN'), ('member', 'NN')]
(S
  My/PRP$
  (NP mother/NN)
  is/VBZ
  cooking/VBG
  (NP food/NN)
  for/IN
  all/PDT
  (NP the/DT family/NN)
  (NP member/NN))


In [10]:
chunker = RegexpParser(r''' 
NP: 
{<DT><NN.*><.*>*<NN.*>} 
}<VB.*>{ 
''') 
sent = [('the', 'DT'), ('sushi', 'NN'), ('roll', 'NN'), ('was', 'VBD'),  
        ('filled', 'VBN'), ('with', 'IN'), ('the', 'DT'), ('fish', 'NN')] 
result2=chunker.parse(sent) 
print(result2)
result2.draw()

(S
  (NP the/DT sushi/NN roll/NN)
  was/VBD
  filled/VBN
  (NP with/IN the/DT fish/NN))


In [11]:
sentence=nltk.word_tokenize("each one plant one")
sentence1=nltk.word_tokenize("Plants required light and water to grow")
sentence=nltk.pos_tag(sentence)
sentence1=nltk.pos_tag(sentence1)
print(sentence)
print(sentence1)

[('each', 'DT'), ('one', 'CD'), ('plant', 'NN'), ('one', 'CD')]
[('Plants', 'NNS'), ('required', 'VBN'), ('light', 'JJ'), ('and', 'CC'), ('water', 'NN'), ('to', 'TO'), ('grow', 'VB')]


In [10]:
nltk.help.upenn_tagset('VBG')

VBG: verb, present participle or gerund
    telegraphing stirring focusing angering judging stalling lactating
    hankerin' alleging veering capping approaching traveling besieging
    encrypting interrupting erasing wincing ...


In [12]:
from nltk.wsd import lesk
print(lesk(sentence, 'plant'))
print(lesk(sentence1, 'plant','n'))

Synset('plant.v.06')
Synset('plant.n.04')


In [14]:
from nltk.corpus import wordnet as wn
for ss in wn.synsets('plant'):
    print(ss,ss.definition())

Synset('plant.n.01') buildings for carrying on industrial labor
Synset('plant.n.02') (botany) a living organism lacking the power of locomotion
Synset('plant.n.03') an actor situated in the audience whose acting is rehearsed but seems spontaneous to the audience
Synset('plant.n.04') something planted secretly for discovery by another
Synset('plant.v.01') put or set (seeds, seedlings, or plants) into the ground
Synset('implant.v.01') fix or set securely or deeply
Synset('establish.v.02') set up or lay the groundwork for
Synset('plant.v.04') place into a river
Synset('plant.v.05') place something or someone in a certain position in order to secretly observe or deceive
Synset('plant.v.06') put firmly in the mind


In [15]:
import nltk


my_sent = "WASHINGTON -- In the wake of a string of abuses by New York police officers in the 1990s, Loretta E. Lynch, the top federal prosecutor in Brooklyn, spoke forcefully about the pain of a broken trust that African-Americans felt and said the responsibility for repairing generations of miscommunication and mistrust fell to law enforcement."

parse_tree = nltk.ne_chunk(nltk.tag.pos_tag(nltk.word_tokenize(my_sent)), binary=True)  # POS tagging before chunking!

parse_tree.draw()
print(parse_tree)


(S
  (NE WASHINGTON/NNP)
  --/:
  In/IN
  the/DT
  wake/NN
  of/IN
  a/DT
  string/NN
  of/IN
  abuses/NNS
  by/IN
  (NE New/NNP York/NNP)
  police/NN
  officers/NNS
  in/IN
  the/DT
  1990s/CD
  ,/,
  (NE Loretta/NNP)
  E./NNP
  Lynch/NNP
  ,/,
  the/DT
  top/JJ
  federal/JJ
  prosecutor/NN
  in/IN
  (NE Brooklyn/NNP)
  ,/,
  spoke/VBD
  forcefully/RB
  about/IN
  the/DT
  pain/NN
  of/IN
  a/DT
  broken/JJ
  trust/NN
  that/IN
  African-Americans/NNP
  felt/VBD
  and/CC
  said/VBD
  the/DT
  responsibility/NN
  for/IN
  repairing/VBG
  generations/NNS
  of/IN
  miscommunication/NN
  and/CC
  mistrust/NN
  fell/VBD
  to/TO
  law/NN
  enforcement/NN
  ./.)


In [16]:
named_entities = []

for t in parse_tree.subtrees():
    if t.label() == 'NE':
        named_entities.append(t)
        # named_entities.append(list(t))  # if you want to save a list of tagged words instead of a tree

print(named_entities)

[Tree('NE', [('WASHINGTON', 'NNP')]), Tree('NE', [('New', 'NNP'), ('York', 'NNP')]), Tree('NE', [('Loretta', 'NNP')]), Tree('NE', [('Brooklyn', 'NNP')])]


In [17]:
from nltk.tree import Tree

txt="WASHINGTON -- In the wake of a string of abuses by New York police officers in the 1990s, Loretta E. Lynch, the top federal prosecutor in Brooklyn, spoke forcefully about the pain of a broken trust that African-Americans felt and said the responsibility for repairing generations of miscommunication and mistrust fell to law enforcement."

pos_tag = nltk.pos_tag(txt.split())
parse_tree = nltk.ne_chunk(pos_tag )
# print(chunk)
parse_tree.draw()
NE=[]
for chunk in parse_tree:
    if hasattr(chunk, 'label'):
        NE=(chunk.label(), ' '.join(c[0] for c in chunk))
        print(NE)
        

('GPE', 'WASHINGTON')
('GPE', 'New York')
('PERSON', 'Loretta E.')


In [18]:
word = nltk.word_tokenize(my_sent)   
pos_tag = nltk.pos_tag(word)   
chunk = nltk.ne_chunk(pos_tag)   
NE = [ " ".join(w for w, t in name) for name in chunk if isinstance(name, nltk.Tree)]   
print (NE)

['WASHINGTON', 'New York', 'Loretta E. Lynch', 'Brooklyn']


In [23]:
word = nltk.word_tokenize(my_sent)   
pos_tag = nltk.pos_tag(word)   
chunk = nltk.ne_chunk(pos_tag)   
for name in chunk:
    if isinstance(name,nltk.Tree):
        ne=" ".join(w for w, t in name)
        print(ne)
       

# NE = [ " ".join(w for w, t in name) for name in chunk if isinstance(name, nltk.Tree)]   
# print (NE)

WASHINGTON
New York
Loretta E. Lynch
Brooklyn


In [1]:
from nltk import pos_tag
from nltk import RegexpParser
text ="Tom should have gone to the dentist yesterday.".split()
print("After Split:",text)
tokens_tag = pos_tag(text)
print("After Token:",tokens_tag)
patterns= """mychunk:{<NN.?>*<VBD.?>*<JJ.?>*<CC>?}"""
chunker = RegexpParser(patterns)
print("After Regex:",chunker)
output = chunker.parse(tokens_tag)
print("After Chunking",output)

After Split: ['Tom', 'should', 'have', 'gone', 'to', 'the', 'dentist', 'yesterday.']
After Token: [('Tom', 'NNP'), ('should', 'MD'), ('have', 'VB'), ('gone', 'VBN'), ('to', 'TO'), ('the', 'DT'), ('dentist', 'NN'), ('yesterday.', 'NN')]
After Regex: chunk.RegexpParser with 1 stages:
RegexpChunkParser with 1 rules:
       <ChunkRule: '<NN.?>*<VBD.?>*<JJ.?>*<CC>?'>
After Chunking (S
  (mychunk Tom/NNP)
  should/MD
  have/VB
  gone/VBN
  to/TO
  the/DT
  (mychunk dentist/NN yesterday./NN))


In [19]:
sentence="my mother is cooking food"


In [20]:
chunk=nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sentence)))
print(chunk)

(S my/PRP$ mother/NN is/VBZ cooking/VBG food/NN)
