In [150]:
from nltk.tokenize import word_tokenize
sentence = "Jim is bringing his bulldog to Walmart?"
tokens = word_tokenize(sentence)
print (len(tokens), tokens)

8 ['Jim', 'is', 'bringing', 'his', 'bulldog', 'to', 'Walmart', '?']


In [151]:
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
tokens = [w for w in tokens if not w in stop_words]
print (len(tokens), tokens)

5 ['Jim', 'bringing', 'bulldog', 'Walmart', '?']


In [152]:
from nltk.stem import LancasterStemmer
ps = LancasterStemmer()
for t in tokens:
    print(ps.stem(t))

jim
bring
bulldog
walmart
?


In [153]:
import nltk
# the off-the-shelf tagger still uses the Penn Treebank tagset
# tags: https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
tags = nltk.pos_tag(tokens)
print (tags, "Nouns:", [t for t in tags if t[1]=='NN'])

[('Jim', 'NNP'), ('bringing', 'VBG'), ('bulldog', 'JJ'), ('Walmart', 'NNP'), ('?', '.')] Nouns: []


In [154]:
# nltk.download('maxent_ne_chunker')
# nltk.download('words')
from nltk import word_tokenize, pos_tag, ne_chunk
from nltk.chunk import conlltags2tree, tree2conlltags

# chunk the sentence
ne_tree = ne_chunk(pos_tag(word_tokenize(sentence)))
 
# IOB transform
# B-{CHUNK_TYPE} – for the word in the Beginning chunk
# I-{CHUNK_TYPE} – for words Inside the chunk
# O – Outside any chunk

iob_tagged = tree2conlltags(ne_tree)
print (iob_tagged)
print ([i for i in iob_tagged if i[2]=='B-ORGANIZATION'])
# http://streamhacker.com/2008/12/29/how-to-train-a-nltk-chunker/

[('Jim', 'NNP', 'B-PERSON'), ('is', 'VBZ', 'O'), ('bringing', 'VBG', 'O'), ('his', 'PRP$', 'O'), ('bulldog', 'NN', 'O'), ('to', 'TO', 'O'), ('Walmart', 'NNP', 'B-PERSON'), ('?', '.', 'O')]
[]


In [155]:
noun = [i for i in iob_tagged if i[1]=='NN' ][0]
print (noun)
from nltk.corpus import wordnet
syns = wordnet.synsets(noun[0])
print(syns[0].definition())

('bulldog', 'NN', 'O')
a sturdy thickset short-haired breed with a large head and strong undershot lower jaw; developed originally in England for bull baiting


In [156]:
w1 = wordnet.synset('bulldog.n.01')
w2 = wordnet.synset('car.n.01')
print(w1.wup_similarity(w2))
w1 = wordnet.synset('bulldog.n.01')
w2 = wordnet.synset('poodle.n.01')
print(w1.wup_similarity(w2))

0.36363636363636365
0.8387096774193549


In [157]:
def syn_ant(word):
    synonyms = []
    antonyms = []
    for syn in wordnet.synsets(word):
        for l in syn.lemmas():
            synonyms.append(l.name())
            if l.antonyms():
                antonyms.append(l.antonyms()[0].name())
    return synonyms, antonyms

s, a = syn_ant(noun[0])
print(set(s))
print(set(a))
s, a = syn_ant('rich')
print(set(s))
print(set(a))

{'bulldog', 'English_bulldog'}
set()
{'rich', 'deep', 'ample', 'full-bodied', 'robust', 'plenteous', 'rich_people', 'fertile', 'productive', 'racy', 'copious', 'plentiful', 'fat'}
{'poor_people', 'poor', 'lean'}
