In [1]:
import sqlite3
from psycopg2.sql import SQL, Identifier, Literal, DEFAULT, Composed

In [2]:
entry_db = 'data/estwn_kb74/wordnet_entry.db'

In [3]:
def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by db_file
    :param db_file: database file
    :return: Connection object or None
    """
    try:
        conn = sqlite3.connect(db_file)
        return conn
    except sqlite3.Error as e:
        print("Connection error: [%s]" % e)

    return None

In [4]:
conn= create_connection('data/estwn_kb74/wordnet_entry.db')
crsr = conn.cursor()

In [5]:
conn.execute("ATTACH DATABASE 'data/estwn_kb74/wordnet_relation.db' AS wordnet_relation")

<sqlite3.Cursor at 0x7fc2a868f030>

In [27]:
def get_synset(conn,crsr,synset_id):
    
    with conn:
        crsr.execute('''SELECT literal FROM wordnet_entry WHERE id = {} ''' .format(synset_id))
        data = crsr.fetchall()
    
    return [row[0] for row in data]

In [38]:
get_synset(conn,crsr,'2')

['patustus', 'eksimine', 'libastumine', 'komistamine', 'patustamine']

In [29]:
def synsets(conn,crsr,lemma,pos=None):
    
    with conn:
        if pos == None:
            crsr.execute('''SELECT id FROM wordnet_entry WHERE literal = '{}' '''.format(lemma))
        else:
            crsr.execute('''SELECT id FROM wordnet_entry WHERE literal = '{}' AND pos = '{}' '''.format(lemma,pos))
        return [row[0] for row in crsr.fetchall()]

In [43]:
for id_data in synsets(conn,crsr,'tegema','v'):
    print(get_synset(conn,crsr,id_data))

['sooritama', 'tegema']
['tegema']
['valmistama', 'tegema']
['toimima', 'tegutsema', 'tegema']
['häält tegema', 'kõlama', 'tegema']
['tingima', 'põhjustama', 'tegema', 'tekitama']
['sünnitama', 'esile kutsuma', 'tegema', 'tekitama', 'looma']


In [30]:
def get_related_synset(conn,cursor,synset_id,relation=None):
    
    with conn:
        if relation == None:
            cursor.execute('''SELECT start_vertex,relation FROM wordnet_relation WHERE end_vertex = '{}' '''.format(synset_id))
        else:
            cursor.execute('''SELECT start_vertex,relation FROM wordnet_relation WHERE end_vertex = '{}' AND relation = '{}' '''.format(synset_id, relation))
        return [row for row in cursor.fetchall()]

In [56]:
get_related_synset(conn,crsr,83551)

[(8209, 'hyponym'),
 (23100, 'similar'),
 (83552, 'hypernym'),
 (83553, 'hypernym')]

In [14]:
class Wordnet:
    def __init__(self,version='74'):
        self.conn = None
        self.crsr = None
        self.version = version
        
        try: 
            self.conn = sqlite3.connect('data/estwn_kb{}/wordnet_entry.db'.format(self.version))
        except sqlite3.Error as e:
            print("Connection error: [%s]" % e)
            return
        
        self.crsr = self.conn.cursor()
        self.conn.execute("ATTACH DATABASE 'data/estwn_kb74/wordnet_relation.db' AS wordnet_relation")
        
    def __del__(self):
        self.conn.close()
        
    def get_synset(self, synset_id):
    
        with self.conn:
            self.crsr.execute('''SELECT literal FROM wordnet_entry WHERE id = {} ''' .format(synset_id))
            return [row[0] for row in self.crsr.fetchall()]

In [15]:
wn = Wordnet()

In [16]:
wn.get_synset(4)

['alistaja', 'võitja', 'lööja']

In [3]:
def _get_key_from_id(id,conn,cursor):
    
    with conn:
        cursor.execute('''SELECT literal, pos, sense FROM wordnet_entry WHERE id = '{}' LIMIT 1'''.format(id))
        data = cursor.fetchall()[0]
        literal = data[0]
        pos = data[1]
        sense = "%02d"%data[2]
        
        return '.'.join([literal,pos,sense])


In [4]:
'''
tee wordnet.py fail milles on Wordnet klass, mille _init_ meetod teeb kaks SQLight andmebaasi ühendust

üks wordnet_relations jaoks ja teine wordnet_entries jaoks

sul on juba olemas kaks funktsiooni mida sellel klassil vaja läheb

lisaks on sul vaja defineerida kõik wn.py olevad meetodid, mis ei alga alakriipsuga

lisaks on sul vaja defineerida _del_ funktsioon, mis paneb SQLight ühendused kinni
'''

class Synset:
    """Represents a WordNet synset.
    Attributes
    ----------
    wordnet: wordnet version
    name : str
      Synset  string identifier in the form `lemma.pos.sense_id`.
    id : int
      Synset integer identifier.
    pos : str
      Synset's part-of-speech.
    _raw_synset: eurown.Synset
      Underlying Synset object. Not intended to access directly.
    """
    
    def __init__(self,wordnet,id):
        
        self.wordnet = wordnet
        self.conn = None
        self.cursor = None
        self.name = str()
        self._raw_synset = None
        self.pos = None
        self.id = id
                  
        try: 
            self.conn = sqlite3.connect('data/estwn_kb{}/wordnet_entry.db'.format(self.wordnet))
            self.cursor = self.conn.cursor()
        except sqlite3.Error as e:
            print("Connection error: [%s]" % e)
            return
        
        self.cursor.execute('''SELECT pos FROM wordnet_entry WHERE id = '{}' LIMIT 1'''.format(id))
        self.pos = self.cursor.fetchone()[0]

        self.cursor.execute('''SELECT estwn_id FROM wordnet_entry WHERE id = '{}' LIMIT 1'''.format(id))
        self._raw_synset =  self.cursor.fetchone()[0]

        self.cursor = self.conn.cursor()
        self.conn.execute("ATTACH DATABASE 'data/estwn_kb74/wordnet_relation.db' AS wordnet_relation")

        self.name = _get_key_from_id(id,self.conn,self.cursor)
        
        
    def __eq__(self,other):
        
        return self.wordnet == other.wordnet
    
    def __del__(self):
        
        self.conn.close()
    
    def get_related_synset(self,synset_id,relation=None):
        '''Returns all relation names and start_vertex if relation not specified, else returns start_vertex of specified relation.
        Parameters
        ----------
        synset_id : int
        relation  : str
        '''   
        with self.conn:
            if relation == None:
                self.cursor.execute('''SELECT end_vertex,relation FROM wordnet_relation WHERE start_vertex = '{}' '''.format(synset_id))
                return [row for row in self.cursor.fetchall()]
            else:
                self.cursor.execute('''SELECT end_vertex FROM wordnet_relation WHERE start_vertex = '{}' AND relation = '{}' '''.format(synset_id, relation))
                return [row[0] for row in self.cursor.fetchall()]
        
    def get_synset(self, synset_id):

        with self.conn:
            self.cursor.execute('''SELECT literal FROM wordnet_entry WHERE id = {} ''' .format(synset_id))
            return [row[0] for row in self.cursor.fetchall()]
        

    def closure(self, relation, depth=float('inf')):   
        
        """Finds all the ancestors of the synset using provided relation.
        Parameters
        ----------
          relation : str
        Name of the relation which is recursively used to fetch the ancestors.
        Returns
        -------
          list of Synsets
        Returns the ancestors of the synset via given relations.
        """
    
        #TODO: return depth-first iteration. Current breath-first.
        ancestors = []
        unvisited_ancestors = [(synset,1) for synset in self.get_related_synset(self.id, relation)]
        while len(unvisited_ancestors) > 0:
            ancestor_depth = unvisited_ancestors.pop()
            if ancestor_depth[1] > depth:
                continue    
            unvisited_ancestors.extend([(synset,ancestor_depth[1]+1) for synset in self.get_related_synset(ancestor_depth[0],relation)])
            ancestors.append(ancestor_depth[0])

        return list(set(ancestors))
    
    
    def hypernyms(self, newId=None):
        """Retrieves all the hypernyms.
        
        Returns
        -------
          list of Synsets
        Synsets which are linked via hypernymy relation.
        
        """
        if newId == None:
            return self.get_related_synset(self.id,"hypernym")
        else:
            return self.get_related_synset(newId, "hypernym")
    def hyponyms(self):
        """Retrieves all the hyponyms.
        
        Returns
        -------
          list of Synsets
        Synsets which are linked via hyponymy relation.
        
        """
        
        return self.get_related_synset(self.id,"hyponym")
    
    def holonyms(self):
        """Retrieves all the holonyms.
        
        Returns
        -------
          list of Synsets
        Synsets which are linked via holonymy relation.
        
        """
        return self.get_related_synset(self.id,"holonym")

    def meronyms(self):
        """Retrieves all the meronyms.
        
        Returns
        -------
          list of Synsets
        Synsets which are linked via meronymy relation.
        
        """
        return self.get_related_synset(self.id,"meronym")

    def member_holonyms(self):
        """Retrieves all the member holoynms.
        
        Returns
        -------
          list of Synsets
        Synsets which are "wholes" of what the synset represents.
        
        """
        return self.get_related_synset(self.id,"holo_member")

    def root_hypernyms(self,hypernym=None):
        """Retrieves all the root hypernyms.
        
        Returns
        -------
          list of Synsets
        Roots via hypernymy relation.
        
        """
        #TODO: retrieve depth-first hypernyms as in commented out portion.
        
        visited = set()
        current_hypernyms = set(self.hypernyms())
        next_hypernyms = set()
        return_hypernyms = set()
        
        for sset in current_hypernyms:
            if sset in visited:
                continue
            visited.add(sset)
            next_hypernyms = set(self.hypernyms(sset.id))
            return_hypernyms |= next_hypernyms
            
            for subsset in next_hypernyms:
                if subsset in visited:
                    continue
                visited.add(subsset)
                return_hypernyms |= set(self.hypernyms(subsset))
        
        return list(return_hypernyms)
            
        '''
        visited = set()
        hypernyms_next_level = set(self.hypernyms())
        current_hypernyms = set(hypernyms_next_level)

        while len(hypernyms_next_level) > 0:
            current_hypernyms = set(hypernyms_next_level)
            hypernyms_next_level = set()
          
            for synset in current_hypernyms:
                if synset in visited:
                    continue
                visited.add(synset)
                hypernyms_next_level |= set(self.hypernyms(synset))

        return list(current_hypernyms)
        '''
    
    def get_variants(self):
        """Returns variants/lemmas of the synset.
        
        Returns
        -------
          list of eurown.Variants
        Lemmas/variants of the synset.
        
        """
        print("Not implemented.")
      
    def definition(self):
        """Returns the definition of the synset.
        
        Returns
        -------
          str
        Definition of the synset as a new-line separated concatenated string from all its variants' definitions.
        
        """
        return print("not implemented")#'\n'.join([variant.gloss for variant in self._raw_synset.variants if variant.gloss])
      

      #TODO compare 1.4 wn lemma return value with entry.db values.
    def lemmas(self):
        """Returns the synset's lemmas/variants' literal represantions.
        
        Returns
        -------
          list of Lemmas
        List of its variations' literals as Lemma objects.
        
        """
        with self.conn:
            self.cursor.execute('''SELECT literal FROM wordnet_entry WHERE id = {} ''' .format(self.id))
            return [row[0] for row in self.cursor.fetchall()]


In [5]:
sset = Synset('74',4)
sset.hypernyms()

[41779, 42966, 44234]

In [6]:
sset._raw_synset

'estwn-et-36144-n'

In [7]:
sset.closure('hypernym')

[44234, 41779, 42966]

In [8]:
sset.member_holonyms()

[]

In [17]:
sset.root_hypernyms()

44234


AttributeError: 'int' object has no attribute 'id'

In [10]:
sset.get_variants()

Not implemented.


In [11]:
sset.lemmas()

['alistaja', 'võitja', 'lööja']

In [26]:
current = set([1,3,6,8])
nextid = set([2,3])

for id in current:
    nextid |= set(id+2)
    print(id)
    for subId in nextid:
        print(subId)
        nextId.insert(0,"new" + subId)

TypeError: 'int' object is not iterable