Skip to content

Feature/issue 10 eparents echildren #25

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 19, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions bin/udapy
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,24 @@ import argparse

from udapi.core.run import Run


argparser = argparse.ArgumentParser(description='udapy - Python interface to Udapi - API for Universal Dependencies')
argparser.add_argument("-q", "--quiet", action="store_true", help="Warning, info and debug messages are suppressed. Only fatal errors are reported.")
argparser.add_argument("-v", "--verbose", action="store_true", help="Warning, info and debug messages are printed to the STDERR.")
argparser.add_argument('scenario', nargs=argparse.REMAINDER, help="A sequence of blocks and their parameters.")
args = argparser.parse_args()

# Initialize logging.
if args.quiet:
logging.basicConfig(format='%(asctime)-15s [%(levelname)7s] %(funcName)s - %(message)s', level=logging.CRITICAL)

# Set the level of logs according to parameters.
if args.verbose:
level = logging.DEBUG
elif args.quiet:
level = logging.CRITICAL
else:
logging.basicConfig(format='%(asctime)-15s [%(levelname)7s] %(funcName)s - %(message)s', level=logging.INFO)
level = logging.INFO

logging.basicConfig(format='%(asctime)-15s [%(levelname)7s] %(funcName)s - %(message)s', level=level)


# Process and provide the scenario.
if __name__ == "__main__":
Expand Down
4 changes: 3 additions & 1 deletion udapi/block/read/conllu.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(self, args=None):
self.file_handler = bz2.BZ2File(self.filename)
else:
logging.info('Opening regular file %s', self.filename)
self.file_handler = open(self.filename, 'r')
self.file_handler = open(self.filename, 'rb')
else:
raise ValueError('No file to process')

Expand Down Expand Up @@ -165,6 +165,8 @@ def process_document(self, document):
for (n_attribute, attribute_name) in enumerate(self.node_attributes):
if attribute_name == 'feats':
attribute_name = 'raw_feats'
if attribute_name == 'deps':
attribute_name = 'raw_deps'
setattr(node, attribute_name, raw_node_attributes[n_attribute])

nodes.append(node)
Expand Down
2 changes: 1 addition & 1 deletion udapi/block/read/reducedconllu.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import codecs
import re

from conllu import Conllu
from udapi.block.read.conllu import Conllu


class ReducedConllu(Conllu):
Expand Down
2 changes: 1 addition & 1 deletion udapi/block/zellig_harris/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def print_triple(self, target_node, context_node, relation_name):
context_word = self.get_word(context_node)

triple = '%s %s_%s' % (target_word, context_word, relation_name)
print triple.encode('utf-8')
print(triple.encode('utf-8'))

def process_node(self, node):
"""
Expand Down
2 changes: 1 addition & 1 deletion udapi/block/zellig_harris/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ def print_triple(node_a, relation_name, node_b, print_lemma=False):
node_a = get_node_representation(node_a, print_lemma=print_lemma)
node_b = get_node_representation(node_b, print_lemma=print_lemma)

print "%s %s_%s" % (node_a, relation_name, node_b)
print("%s %s_%s", node_a, relation_name, node_b)
4 changes: 2 additions & 2 deletions udapi/block/zellig_harris/csnouns.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

from udapi.core.block import Block

from common import *
from queries import *
from udapi.block.zellig_harris.common import *
from udapi.block.zellig_harris.queries import *


class CsNouns(Block):
Expand Down
4 changes: 2 additions & 2 deletions udapi/block/zellig_harris/csverbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

from udapi.core.block import Block

from common import *
from queries import *
from udapi.block.zellig_harris.common import *
from udapi.block.zellig_harris.queries import *


class CsVerbs(Block):
Expand Down
4 changes: 2 additions & 2 deletions udapi/block/zellig_harris/ennouns.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

from udapi.core.block import Block

from common import *
from queries import *
from udapi.block.zellig_harris.common import *
from udapi.block.zellig_harris.queries import *


class EnNouns(Block):
Expand Down
4 changes: 2 additions & 2 deletions udapi/block/zellig_harris/enverbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

from udapi.core.block import Block

from common import *
from queries import *
from udapi.block.zellig_harris.common import *
from udapi.block.zellig_harris.queries import *


class EnVerbs(Block):
Expand Down
181 changes: 87 additions & 94 deletions udapi/core/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,44 +29,44 @@ class Node(object):
Class for representing non-root nodes in Universal Dependency trees.

"""

__slots__ = list()

# (A) Features following the CoNLL-U documentation:
__slots__.append('_ord') # Word index, integer starting at 1 for each new sentence.
__slots__.append('_form') # Word form or punctuation symbol.
__slots__.append('_lemma') # Lemma or stem of word form.
__slots__.append('_upostag') # Universal POS tag drawn from our revised version of the Google UPOS tags.
__slots__.append('_xpostag') # Language-specific part-of-speech tag; underscore if not available.
__slots__.append('_raw_feats') # List of morphological features.
__slots__.append('_head') # Head of the current token, which is either a value of ID or zero (0).
__slots__.append('_deprel') # Universal Stanford dependency relation to the HEAD (root iff HEAD = 0).
__slots__.append('_deps') # List of secondary dependencies (head-deprel pairs).
__slots__.append('_misc') # Any other annotation.

# (B) Udapi-specific extra features:
__slots__.append('_feats') # A serialization of the morphological features, as they appear in conllu files.
__slots__.append('_parent') # Parent node.
__slots__.append('_children') # Ord-ordered list of child nodes.
__slots__.append('_aux') # Other technical attributes.
__slots__ = [
'ord', # Word index, integer starting at 1 for each new sentence.
'form', # Word form or punctuation symbol.
'lemma', # Lemma or stem of word form.
'upostag', # Universal POS tag drawn from our revised version of the Google UPOS tags.
'xpostag', # Language-specific part-of-speech tag; underscore if not available.
'head', # Head of the current token, which is either a value of ID or zero (0).
'deprel', # Universal Stanford dependency relation to the HEAD (root iff HEAD = 0).
'misc', # Any other annotation.

'_raw_deps', # Secondary dependencies (head-deprel pairs) in their original CoNLLU format.
'_deps', # Deserialized secondary dependencies in a list od {parent, deprel} dicts.
'_raw_feats', # Morphological features in their original CoNLLU format.
'_feats', # Deserialized morphological features stored in a dict (feature -> value).
'_parent', # Parent node.
'_children', # Ord-ordered list of child nodes.
'_aux' # Other technical attributes.
]

def __init__(self, data=None):
if data is None:
data = dict()

# Initialization of the (A) list.
self._ord = 0
self._form = '_'
self._lemma = '_'
self._upostag = '_'
self._xpostag = '_'
self._raw_feats = '_'
self._head = '_'
self._deprel = '_'
self._deps = '_'
self._misc = '_'
# setattr(self, 'ord', 0)
# self.ord = 0
# self.form = '_'
# self.lemma = '_'
# self.upostag = '_'
# self.xpostag = '_'
# self.head = '_'
# self.deprel = '_'
# self.misc = '_'

# Initialization of the (B) list.
self._raw_deps = '_'
self._deps = None
self._raw_feats = '_'
self._feats = None
self._parent = None
self._children = list()
Expand All @@ -83,47 +83,10 @@ def __str__(self):
:return: A pretty textual description of the Node.

"""
return "<%d, %s, %d, %s>" % (self.ord, self.form, self.parent.ord, self.deprel)

@property
def ord(self):
return self._ord

@ord.setter
def ord(self, value):
self._ord = int(value)

@property
def form(self):
return self._form

@form.setter
def form(self, value):
self._form = value

@property
def lemma(self):
return self._lemma

@lemma.setter
def lemma(self, value):
self._lemma = value

@property
def upostag(self):
return self._upostag

@upostag.setter
def upostag(self, value):
self._upostag = value

@property
def xpostag(self):
return self._xpostag

@xpostag.setter
def xpostag(self, value):
self._xpostag = value
parent_ord = None
if self.parent is not None:
parent_ord = self.parent.ord
return "<%d, %s, %s, %s>" % (self.ord, self.form, parent_ord, self.deprel)

@property
def raw_feats(self):
Expand Down Expand Up @@ -157,36 +120,35 @@ def raw_feats(self, value):
self._feats = None

@property
def head(self):
return self._head
def raw_deps(self):
"""
After the access to the raw secondary dependencies,
provide the serialization if they were deserialized already.

@head.setter
def head(self, value):
self._head = int(value)
:return: A raw string with secondary dependencies, as stored in the CoNLLU files.
:rtype: str

@property
def deprel(self):
return self._deprel
"""
if self._deps is not None:
serialized_deps = []
for secondary_dependence in self._deps:
serialized_deps.append('%d:%s' % (secondary_dependence['parent'].ord, secondary_dependence['deprel']))

@deprel.setter
def deprel(self, value):
self._deprel = value
serialized_deps = '|'.join(serialized_deps)
self._raw_deps = serialized_deps

@property
def deps(self):
return self._deps
return self._raw_deps

@deps.setter
def deps(self, value):
self._deps = value
@raw_deps.setter
def raw_deps(self, value):
"""
When updating raw secondary dependencies, delete the current version of the deserialized data.

@property
def misc(self):
return self._misc
:param value: A new raw secondary dependencies.

@misc.setter
def misc(self, value):
self._misc = value
"""
self._raw_deps = str(value)
self._deps = None

@property
def feats(self):
Expand All @@ -210,6 +172,37 @@ def feats(self):
def feats(self, value):
self._feats = value

@property
def deps(self):
"""
After the first access to the secondary dependencies set,
provide the deserialization of the raw data and save deps to the list.

:return: A list with secondary dependencies.
:rtype: list

"""
if self._deps is None:
# Obtain a list of all nodes in the dependency tree.
nodes = [self.root] + self.root.descendants()

# Create a list of secondary dependencies.
self._deps = list()

if self._raw_deps == '_':
return self._deps

for raw_dependency in self._raw_deps.split('|'):
head, deprel = raw_dependency.split(':')
parent = nodes[int(head)]
self._deps.append({'parent': parent, 'deprel': deprel})

return self._deps

@deps.setter
def deps(self, value):
self._deps = value

@property
def parent(self):
return self._parent
Expand Down
4 changes: 2 additions & 2 deletions udapi/core/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging

from document import Document
from udapi.core.document import Document


def _parse_block_name(block_name):
Expand Down Expand Up @@ -86,7 +86,7 @@ def _import_blocks(block_names, block_args):
try:
command = "from " + module + " import " + class_name + " as b" + str(block_id)
logging.debug("Trying to run command: %s", command)
exec command
exec(command)
except:
raise RuntimeError("Error when trying import the block %s", block_name)

Expand Down
8 changes: 8 additions & 0 deletions udapi/core/tests/data/secondary_dependencies.conllu
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# sent_id a-mf920901-001-p1s1A
# orig_file_sentence mf920901_001#1
1 Slovenská slovenský ADJ AAFS1----1A---- Case=Nom|Degree=Pos|Gender=Fem|Negative=Pos|Number=Sing 2 amod 0:root|2:amod _
2 ústava ústava NOUN NNFS1-----A---- Case=Nom|Gender=Fem|Negative=Pos|Number=Sing 0 root 0:root SpaceAfter=No
3 : : PUNCT Z:------------- _ 2 punct 0:root _
4 pro pro ADP RR--4---------- AdpType=Prep|Case=Acc 2 appos 0:root LId=pro-1
5 i i CONJ J^------------- _ 4 cc 1:amod LId=i-1
6 proti proti ADP RR--3---------- AdpType=Prep|Case=Dat 4 conj 5:conj LId=proti-1
Loading