In [30]:
# task3.py
# How to install and use nltk?
# https://www.nltk.org/install.html

# How to download nltk data?
# https://www.nltk.org/data.html
# command line $ python3
# python3 >> import nltk
# python3 >> nltk.download()
# download directory: /usr/local/share/nltk_data
# download what you will need

import task2
import task3_1

# load corpus
corpus = task2._load_corpus("corpus.txt")

## 3.1
sentences, words_all = task3_1._task3_1(corpus)

# TEST
# print("sentences")
# print(sentences[1:2][0])

# print("words_all")
# print(words_all[1:2][0])

## 3.3



sentences
The name "music" contains two ideas, both of them important in our modern use of the term: The general meaning is that of "a pleasing modulation of sounds."
words_all
['The', 'name', '``', 'music', "''", 'contains', 'two', 'ideas', ',', 'both', 'of', 'them', 'important', 'in', 'our', 'modern', 'use', 'of', 'the', 'term', ':', 'The', 'general', 'meaning', 'is', 'that', 'of', '``', 'a', 'pleasing', 'modulation', 'of', 'sounds', '.', "''"]


In [6]:
## 3.4 (updated version)
# perform dependency parsing 
# or full-syntactic parsing 
# to parse tree-based patterns as features

### 3.4.0 preparation
# Setup CoreNLP with Python:
# https://www.khalidalnajjar.com/setup-use-stanford-corenlp-server-python/
# Download Stanford CoreNLP:
# https://stanfordnlp.github.io/CoreNLP/index.html#download
# Unzip to local directory:
# for example: ../resources/stanford-corenlp-full-2018-10-05/
# Run Stanford CoreNLP Server in command line:
# java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -annotators "tokenize,ssplit,pos,lemma,parse,sentiment" -port 9000 -timeout 30000

# nlkt.parse API:
# http://www.nltk.org/api/nltk.parse.html#nltk.parse.corenlp.GenericCoreNLPParser

### 3.4.1 CoreNLP Dependency Parser
from nltk.parse.corenlp import CoreNLPDependencyParser as DP
dep_parser = DP(url='http://localhost:9000')

# example:
# parse, = dep_parser.raw_parse(
#     'The quick brown fox jumps over the lazy dog.'
# )
# print(parse.to_conll(4))

parse = dep_parser.raw_parse_sents(sentences[5:6])
print(sentences[5:6])

# show parsed results:
for itr_tree in parse:
    for tree in itr_tree:
        print(tree.to_conll(4))
#         print(tree.tree())
    print()

['Music has to do with tones, sounds selected on account of their musical quality and relations.']
Music	NN	2	nsubj
has	VBZ	0	ROOT
to	TO	4	mark
do	VB	2	xcomp
with	IN	6	case
tones	NNS	4	nmod
,	,	2	punct
sounds	VBZ	9	auxpass
selected	VBN	2	dep
on	IN	11	case
account	NN	9	nmod
of	IN	15	case
their	PRP$	15	nmod:poss
musical	JJ	15	amod
quality	NN	11	nmod
and	CC	15	cc
relations	NNS	15	conj
.	.	2	punct




In [31]:
### 3.4.2 CoreNLP Parser
from nltk.parse.corenlp import CoreNLPParser as CP
parser = CP(url='http://localhost:9000')

# example:
# parse, = parser.raw_parse(
#     'The quick brown fox jumps over the lazy dog.'
# )
# print(parse.pretty_print())

parse = parser.raw_parse_sents(sentences[5:6])
print(sentences[5:6])

# show parsed results:
for itr_tree in parse:
    for tree in itr_tree:
        tree.pretty_print()
    print()

['Music has to do with tones, sounds selected on account of their musical quality and relations.']
                                                  ROOT                                                              
                                                   |                                                                 
                                                  SINV                                                              
            _______________________________________|______________________________________________________________   
           S                          |    |                                   |                                  | 
   ________|___                       |    |                                   |                                  |  
  |            VP                     |    |                                   |                                  | 
  |     _______|___                   |    |                                   

In [8]:
## 3.5
# using WordNet,
# extract hypernymns, hyponyms, meronyms, and holonyms
# as features

# nltk WordNet Interface:
# http://www.nltk.org/howto/wordnet.html
from nltk.corpus import wordnet as wn

# function: get synsets of a word
# argument: a word string
# return: a list of synsets
def __synsets(word):
    return wn.synsets(word)

# function: get hypernymns of a synset
# argument: a synset
# return: a list of synsets
def __hypernymns(synset):
    return synset.hypernyms()

# function: get hyponyms of a synset
# argument: a synset
# return: a list of synsets
def __hyponyms(synset):
    return synset.hyponyms()

# function: get meronyms of a synset
# argument: a synset
# return: a list of synsets
def __meronyms(synset):
    return synset.member_holonyms()

# TODO?
# function: get holonyms of a synset
# argument: a synset
# return: a list of synsets
def __holonyms(synset):#parts
    # TEST
    for synset in wn.synsets('green'):
        for hypernym in synset.hypernyms():
            print(hypernym)
    print("----")
    for synset in wn.synsets('rice'):
        for hypernym in synset.part_holonyms():
            print(hypernym)
    return synset.part_holonyms()

# TEST
synsets = __synsets('rice')
synset = synsets[0]
print("synsets:", synsets, "\n")

hypernyms = __hypernymns(synset)
print("hypernyms:", hypernyms, "\n")

hyponyms = __hyponyms(synset)
print("hyponyms:", hyponyms, "\n")

meronyms = __meronyms(synset)
print("meronyms(being a member of):", meronyms, "\n")

holonyms = __holonyms(synset)
print("holonyms(having members of):", holonyms, "\n")

synsets: [Synset('rice.n.01'), Synset('rice.n.02'), Synset('rice.n.03'), Synset('rice.n.04'), Synset('rice.v.01')] 

hypernyms: [Synset('grain.n.02'), Synset('starches.n.01')] 

hyponyms: [Synset('brown_rice.n.01'), Synset('paddy.n.03'), Synset('white_rice.n.01')] 

meronyms(being a member of): [] 

Synset('chromatic_color.n.01')
Synset('tract.n.01')
Synset('environmentalist.n.01')
Synset('site.n.01')
Synset('vegetable.n.01')
Synset('ketamine.n.01')
Synset('discolor.v.03')
----
Synset('cultivated_rice.n.01')
holonyms(having members of): [] 

