Skip to content

Recursively searching for non-empty ancestors of enhanced dependencies with empty parents #123

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 55 additions & 5 deletions udapi/block/corefud/delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,69 @@ def __init__(self, empty=False, **kwargs):
super().__init__(**kwargs)
self.empty = empty

def is_root_reachable_by_deps(self, node, parents_to_ignore=None):
""" Check if the root node is reachable from node, possibly after deleting the parents_to_ignore nodes.
"""
stack = [(node, [])]
while stack:
proc_node, path = stack.pop()
# root is reachable
if proc_node == node.root:
break
# path forms a cycle, the root cannot be reached through this branch
if proc_node in path:
continue
for dep in proc_node.deps:
# the root cannot be reached through ignored nodes
if dep['parent'] in parents_to_ignore:
continue
# process the parent recursively
stack.append((dep['parent'], path + [proc_node]))
else:
return False
return True

def _deps_ignore_nodes(self, node, parents_to_ignore):
""" Retrieve deps from the node, recursively ignoring specified parents.
"""
newdeps = []
stack = [(node, [])]
while stack:
proc_node, skipped_nodes = stack.pop()
# if there is a cycle of skipped nodes, ground the subtree to the root
if proc_node in skipped_nodes:
newdeps.append({'parent': node.root, 'deprel': 'root'})
continue
for dep in proc_node.deps:
# keep deps with a parent that shouldn't be ignored
if not dep['parent'] in parents_to_ignore:
newdeps.append(dep)
continue
# process the ignored parent recursively
stack.append((dep['parent'], skipped_nodes + [proc_node]))
return newdeps

def process_document(self, doc):
# This block should work both with coreference loaded (deserialized) and not.
doc._eid_to_entity = None
for root in doc.trees:
if self.empty:
root.empty_nodes = []
for node in root.descendants:
if node.raw_deps != '_':
node.raw_deps = '|'.join(d for d in node.raw_deps.split('|') if not '.' in d)
if node.raw_deps == '':
node.raw_deps = '0:root'
# process only the nodes dependent on empty nodes
if not '.' in node.raw_deps:
continue
# just remove empty parents if the root remains reachable
if self.is_root_reachable_by_deps(node, root.empty_nodes):
node.deps = [dep for dep in node.deps if not dep['parent'] in root.empty_nodes]
# otherwise propagate to non-empty ancestors
else:
newdeps = self._deps_ignore_nodes(node, root.empty_nodes)
newdeps_sorted = sorted(set((dep['parent'].ord, dep['deprel']) for dep in newdeps))
node.raw_deps = '|'.join(f"{p}:{r}" for p, r in newdeps_sorted)

if '.' in node.misc['Functor'].split(':')[0]:
del node.misc['Functor']
root.empty_nodes = []

for node in root.descendants + root.empty_nodes:
node._mentions = []
Expand Down