In [3]:
from nltk.corpus import wordnet as wn
from consts import PATH_TO_MASKED_SENTENCES_AMRS_
import joblib

In [4]:
sentences_with_amr_container = joblib.load(PATH_TO_MASKED_SENTENCES_AMRS_WITH_LABEL2WORD)

In [5]:
obj = sentences_with_amr_container[0]
graph = obj[1].graph_nx
label2words = obj[1].label2word

In [6]:
label2words.values()

dict_values(['slogan', 'company', 'and', 'expect', 'more', 'pay', 'less'])

In [7]:
more = wn.synsets('more')[0]
less = wn.synsets('less')[1]

In [8]:
def find_synsets(word):
    synonyms = []
    for syn in wn.synsets(word):
        for lemma in syn.lemmas():
            synonyms.append(lemma.name())
    return set(synonyms)

In [9]:
def find_antonyms(word):
    antonyms = []
    for syn in wn.synsets(word):
        for lemma in syn.lemmas():
            for ant in lemma.antonyms():
                antonyms.append(ant.name())
    return set(antonyms)

In [10]:
def check_if_words_are_antonyms(word1, word2):
    word1_ants = find_antonyms(word1)
    word2_ants = find_antonyms(word2)
    return word1 in word2_ants or word2 in word1_ants

In [11]:
def check_if_words_are_synonyms(word1, word2):
    word1_syns = find_synsets(word1)
    word2_syns = find_synsets(word2)
    return word1 in word2_syns or word2 in word1_syns

In [12]:
def get_hypernyms_to_root(word):
    hypernyms = []
    for syn in wn.synsets(word):
        for entity in syn.hypernym_paths()[0]:
            for lemma in entity.lemmas():
                hypernyms.append(lemma.name())
    return set(hypernyms)

In [13]:
def get_hypernyms_in_width_and_depth(word, level = 0):
    all_hypernyms = []
    hypernyms_in_queue = [(word, 0)]
    processed_nodes = []
    while len(hypernyms_in_queue):
        word, idx = hypernyms_in_queue.pop(0)
        processed_nodes.append(word)
        if idx == level:
            break
        for syn in wn.synsets(word):
            for hypernyms in syn.hypernyms():
                for lemma in hypernyms.lemmas():
                    all_hypernyms.append(lemma.name())
                    if lemma.name() not in processed_nodes:
                        hypernyms_in_queue.append((lemma.name(), idx + 1))
    return set(all_hypernyms)

In [14]:
def get_all_hypernyms(word, level_in_depth = 0):
    return set.union(
        get_hypernyms_in_width_and_depth(word = word, level=level_in_depth),
        get_hypernyms_to_root(word = word)
    )

In [15]:
get_all_hypernyms('man', level_in_depth=0)

{'Isle_of_Man',
 'Man',
 'abstract_entity',
 'abstraction',
 'adult',
 'adult_male',
 'animal',
 'animate_being',
 'animate_thing',
 'artefact',
 'artifact',
 'assistant',
 'beast',
 'being',
 'body_servant',
 'brute',
 'cater',
 'causal_agency',
 'causal_agent',
 'cause',
 'chordate',
 'craniate',
 'creature',
 'do_work',
 'domestic_partner',
 'dry_land',
 'earth',
 'entity',
 'equipment',
 'eutherian',
 'eutherian_mammal',
 'fauna',
 'foot_soldier',
 'game_equipment',
 'gentleman',
 "gentleman's_gentleman",
 'give',
 'ground',
 'group',
 'grouping',
 'grownup',
 'help',
 'helper',
 'hominid',
 'homo',
 'human',
 'human_being',
 'human_beings',
 'human_race',
 'humanity',
 'humankind',
 'humans',
 'individual',
 'instrumentality',
 'instrumentation',
 'island',
 'land',
 'living_thing',
 'lover',
 'male',
 'male_person',
 'mammal',
 'mammalian',
 'man',
 'mankind',
 'military_man',
 'military_personnel',
 'mortal',
 'object',
 'organism',
 'person',
 'physical_entity',
 'physical_obje

In [16]:
boy = wn.synsets('boy')[0]

In [17]:
boy.hypernyms()

[Synset('male.n.02')]

In [18]:

def check_if_words_have_parent_childre_relation(word1, word2):
    word1_hypernyms = get_all_hypernyms(word1, level_in_depth=0)
    word2_hypernyms = get_all_hypernyms(word2, level_in_depth=0)

    if word1 in word2_hypernyms:
        return 0
    elif word2 in word1_hypernyms:
        return 1
    return -1
    

In [19]:
check_if_words_have_parent_childre_relation('boy', 'male')

1

In [20]:
check_if_words_have_parent_childre_relation('girl', 'woman')

1

In [21]:
check_if_words_have_parent_childre_relation('man', 'men')

0

In [22]:
check_if_words_have_parent_childre_relation('apple', 'fruit')

1

In [23]:
get_all_hypernyms('bad', level_in_depth=0)

{'abstract_entity',
 'abstraction',
 'attribute',
 'bad',
 'badly',
 'badness',
 'big',
 'defective',
 'entity',
 'forged',
 'high-risk',
 'quality',
 'regretful',
 'risky',
 'sorry',
 'speculative',
 'spoiled',
 'spoilt',
 'tough',
 'uncollectible',
 'unfit',
 'unsound'}

In [24]:
def get_entailment_graph(verb):
    all_entailments = set()
    for syn in wn.synsets(verb):
        for entailed_verb in syn.entailments():
            for lemma in entailed_verb.lemmas():
                all_entailments.add(lemma.name())
    return [word.lower().replace('_', ' ') for word in all_entailments]

In [25]:
get_entailment_graph('freeze')

['solidify']

In [26]:
def get_word_meronyms(word):
    all_meronyms = set()
    for syn in wn.synsets(word):
        for meronym in syn.part_meronyms():
            for lemma in meronym.lemmas():
                all_meronyms.add(lemma.name())
        for meronym in syn.substance_meronyms():
            for lemma in meronym.lemmas():
                all_meronyms.add(lemma.name())
    return [word.lower().replace('_', ' ') for word in all_meronyms]

In [28]:
def get_word_holonyms(word):
    all_holonyms = set()
    for syn in wn.synsets(word):
        for meronym in syn.part_holonyms():
            for lemma in meronym.lemmas():
                all_holonyms.add(lemma.name())
        for meronym in syn.substance_holonyms():
            for lemma in meronym.lemmas():
                all_holonyms.add(lemma.name())
    return [word.lower().replace('_', ' ') for word in all_holonyms]

In [31]:
get_word_holonyms('paper')

['page']

In [32]:
obj = sentences_with_amr_container[0]

In [33]:
obj[0]

'company\'s slogan "Expect More. Pay Less."'

In [34]:
graph = obj[1]

In [35]:
graph = obj[1].graph_nx

In [37]:
graph.nodes(data= True)

NodeDataView({'s': {'label': '"s/slogan"', 'shape': 'circle'}, 'c': {'label': '"c/company"', 'shape': 'circle'}, 'a': {'label': '"a/and"', 'shape': 'circle'}, 'e': {'label': '"e/expect-01"', 'shape': 'circle'}, 'm': {'label': '"m/more"', 'shape': 'circle'}, 'p': {'label': '"p/pay-01"', 'shape': 'circle'}, 'l': {'label': '"l/less"', 'shape': 'circle'}})

In [38]:
obj[1].label2word

{'s': 'slogan',
 'c': 'company',
 'a': 'and',
 'e': 'expect',
 'm': 'more',
 'p': 'pay',
 'l': 'less'}

In [39]:
import nltk

In [61]:
nltk.download('averaged_perceptron_tagger')
nltk.download('universal_tagset')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /nas/home/souratih/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package universal_tagset to
[nltk_data]     /nas/home/souratih/nltk_data...
[nltk_data]   Unzipping taggers/universal_tagset.zip.


True

In [67]:
nltk.pos_tag(['quickly'], tagset = 'universal')[0][1]

'ADV'

In [None]:
pos_tags = ['NOUN', 'VERB', 'ADJ', 'ADV']

In [47]:
a = []
for obj in sentences_with_amr_container:
    a.append(obj[1].graph_nx.number_of_nodes())

In [52]:
max(a), min(a), sum(a) / len(a)

(95, 2, 22.53704705246079)

In [None]:
)

In [53]:
22 * 21 / 2

231.0