In [1]:
from estnltk.wordnet import wn
import sqlite3

In [None]:
# save all synsets from wn to synsetList
pos = [wn.ADJ, wn.ADV, wn.VERB, wn.NOUN]
synsetList=[]
for i in pos:
    tmp = wn.all_synsets(i)
    for j in tmp:
        synsetList.append(j)  

In [3]:
sset_db = 'wordnet/data/all_synsets.db'
relation_db = 'wordnet/data/all_relations_strings.db'

In [4]:
end_vrtx = []
start_vrtx = []
start_sset = []
end_sset = []
rel_type = []

In [6]:
def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by db_file
    :param db_file: database file
    :return: Connection object or None
    """
    try:
        conn = sqlite3.connect(db_file)
        return conn
    except sqlite3.Error as e:
        print("Connection error: [%s]" % e)

    return None

def create_table(conn, create_table_sql ):
    """ create a table from the create_table_sql statement
    :param conn: Connection object
    :param create_table_sql: a CREATE TABLE statement
    :return:
    """
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except sqlite3.Error as e:
        print("Connection error while creating table: [%s]" % e)

def sqlTables(databaseLoc):

    sql_create_synset_table = ''' CREATE TABLE IF NOT EXISTS graph_table1(

                                        start_vertex INT NOT NULL,
                                        start_synset TEXT NOT NULL,
                                        end_synset TEXT NOT NULL,
                                        end_vertex INT NOT NULL,
                                        relation TEXT NOT NULL
                                                    ); '''
    conn = create_connection(databaseLoc)
    if conn is not None:
        create_table(conn,sql_create_synset_table)
    else:
        print("Error! cannot create db conn.")

In [7]:
def synset_str(synset_word, cursor):
    cursor.execute('SELECT pos, sense FROM wordnet_entries WHERE synset_word = ?',(synset_word,))
    data = cursor.fetchone()
    if data is not None:
        var = "." + data[0] + "." + str(data[1])
        synset_word += var
    else: return None
    
    return synset_word
    

In [8]:
def fetch_id(synset_word,cursor):
    
    cursor.execute('SELECT synset_id FROM synset_table WHERE synset_word = ?',(synset_word,))
    data = cursor.fetchone()
    if data is not None:
        return data[0]
    else: return None

In [9]:
def get_literals(synset_list):
    lit_word = []
    for sset in synset_list:
        for word in sset._raw_synset.variants:
            lit_word.append(word.literal)
    
    return lit_word

In [10]:
def fetch_synsets(synset_db):
    '''
    Reads synsets from database in order first to last: synset_id - int, synset_word - str, POS - str, sense - int, literal- str
    '''
    
    conn = create_connection(synset_db)
    cursor = conn.cursor()
    with conn:
        #select all synsets with different indices from table.
        cursor.execute("SELECT DISTINCT synset_word FROM synset_table")
        for row in cursor.fetchall():
            synsetList.append(row[0])
    

In [11]:
def fetch_relations(db_file):
    
    relationList = ["has_hyperonym", "has_hyponym", "has_holonym", "has_meronym", "has_member_holo"]
    relation_str = ["hyperonym", "hyponym", "holonym", "meronym", "member_holonym"]
    conn = create_connection(db_file)
    cursor = conn.cursor()
    
    with conn:
        for sset in synsetList:
            i=0
            sset_word = sset._raw_synset.firstVariant.literal
            end = fetch_id(sset_word,cursor) 
            sset_str = synset_str(sset_word, cursor)
            for rel in relationList:
                rel_sset_list = sset.get_related_synsets(rel)
                if rel_sset_list:                 
                    rel_word = get_literals(rel_sset_list)
                    '''
                    # FIXME: few words from wn are returned in form of single list "[word]" instead of "word".
                    if len(rel_word) == 1:
                        rel_word = rel_word[0]
                    #~~~
                   ''' 
                    start = []
                    if len(rel_word) > 1 and rel_word[0] is not '[':
                        for word in rel_word:
                            rel_type.append(relation_str[i])
                            word_str = synset_str(word,cursor)
                            start_sset.append(word_str)
                            end_sset.append(sset_str)
                            start = fetch_id(word,cursor)
                            #if start is not None:
                            start_vrtx.append(start)
                            #if end is not None:
                            end_vrtx.append(end)
                            #else: end_vrtx.append("None")
                    elif len(rel_word) == 1:
                        rel_type.append(relation_str[i]) 
                        rel_str = synset_str(rel_word[0], cursor)
                        end_sset.append(sset_str)
                        start_sset.append(rel_str)
                        start = fetch_id(rel_word[0],cursor)
                        #if start is not None:
                        start_vrtx.append(start)
                        #if end is not None:
                        end_vrtx.append(end)
                        #else: end_vrtx.append("None")
                i+=1

In [None]:
start_vrtx = []
end_vrtx   = []
end_sset   = []
start_sset = []
rel_type = []
db_file = 'wordnet/data/all_synsets.db'
conn = create_connection(db_file)
cursor = conn.cursor()
for sset in synsetList:
    i=0
    sset_word = sset._raw_synset.firstVariant.literal
    end = fetch_id(sset_word,cursor) 
    sset_str = synset_str(sset_word, cursor)
    rel_sset_list = sset.get_related_synsets("has_member_holo")
    if rel_sset_list:                 
        rel_word = get_literals(rel_sset_list)
        '''
        # FIXME: few words from wn are returned in form of single list "[word]" instead of "word".
        if len(rel_word) == 1:
            rel_word = rel_word[0]
        #~~~
       ''' 
        start = []
        if len(rel_word) > 1 and rel_word[0] is not '[':
            for word in rel_word:
                rel_type.append(relation_str[i])
                word_str = synset_str(word,cursor)
                start_sset.append(word_str)
                end_sset.append(sset_str)
                start = fetch_id(word,cursor)
                #if start is not None:
                start_vrtx.append(start)
                #if end is not None:
                end_vrtx.append(end)
                #else: end_vrtx.append("None")
        elif len(rel_word) == 1:
            rel_type.append(relation_str[i]) 
            rel_str = synset_str(rel_word[0], cursor)
            end_sset.append(sset_str)
            start_sset.append(rel_str)
            start = fetch_id(rel_word[0],cursor)
            #if start is not None:
            start_vrtx.append(start)
            #if end is not None:
            end_vrtx.append(end)
            #else: end_vrtx.append("None")
    i+=1

In [78]:
i=0
for sset in synsetList:
    
    sset_word = sset._raw_synset.firstVariant.literal
    #end = fetch_id(sset_word,cursor) 
    sset_str = synset_str(sset_word, cursor)
    rel_sset_list = sset.get_related_synsets("has_member_holo")
    if rel_sset_list:
        print(i)
        print(rel_sset_list)
    i+=1

KeyboardInterrupt: 

In [23]:
start_vrtx = []
end_vrtx   = []
end_sset   = []
start_sset = []
rel_type = []

fetch_relations(sset_db)

KeyboardInterrupt: 

In [50]:
def upload_relations(db_file):
    
    sqlTables(db_file)
    conn = create_connection(db_file)
    cursor = conn.cursor()
    
    with conn:
        for i in range(len(start_vrtx)):
            start_id   = start_vrtx[i]
            end_word   = str(end_sset[i])
            start_word = str(start_sset[i])
            end_id     = end_vrtx[i]
            relation   = rel_type[i]
            cursor.execute("INSERT INTO graph_table(start_vertex, start_synset, end_synset, end_vertex, relation) VALUES(?,?,?,?,?)"\
                                                            ,(start_id, start_word, end_word, end_id,relation))
            conn.commit()

In [None]:
#First holonym @ index 11186,  meronym @ 11184, hyponym @ 123
upload_relations(relation_db)

In [142]:
def synset_name(raw_synset):
    pos = raw_synset.pos
    literal = raw_synset.variants[0].literal
    sense = "%02d"%raw_synset.variants[0].sense
    return '.'.join([literal,pos,sense])

In [24]:
sset = synsetList[0]
sset_string = synset_name(sset._raw_synset)
print(sset_string)
print(sset._raw_synset.pos)
#sset_li = get_literals(synsetList[0])


NameError: name 'synset_name' is not defined

In [None]:
def synset_name(raw_synset):
    pos = raw_synset.pos
    literal = raw_synset.variants[0].literal
    sense = "%02d"%raw_synset.variants[0].sense
    return '.'.join([literal,pos,sense])

In [7]:
sset = synsetList[122]
len(sset.get_related_synsets("has_hyponym"))

45

In [None]:
hyperList = []
for sset in synsetList:
    if sset.get_related_synsets("has_hyperonym"):
        var = sset.get_related_synsets("has_hyperonym")[0]
        print(var)
        rsset = var._raw_synset
        name = rsset.variants[0].literal
        print(name)
        
        #print("synset: ", sset)
        #print("hyperonym: ", sset.get_related_synsets("has_hyperonym")[0])

In [None]:
'''cycle for relations - hyponym,hypernym, etc.'''   
for sset in synsetList:
        rsset = sset._raw_synset
        #print(sset)
        for relation_candidate in rsset.internalLinks:
            linked_synset = wn.synset(wn._get_key_from_raw_synset(relation_candidate.target_concept))
            relation_candidate.target_concept = linked_synset._raw_synset
            #print(relation_candidate.target_concept)
            #print(linked_synset)
            #print(relation_candidate.name)            
            '''
            sset_literal    =relation_candidate.target_concept.variants[0].literal
            sset_sense      =relation_candidate.target_concept.variants[0].sense
            sset_pos        =rsset.pos
            #[nimi.literal for nimi in rsset.variants ]
            '''