Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 66 additions & 7 deletions udapi/block/msf/phrase.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,31 @@ def process_node(self, node):
'animacy':'PhraseAnimacy',
'ords':'Phrase'
}

# a dictionary where the key is the lemma of a negative particle and the value is a list of the lemmas of their possible children that have a 'fixed' relation
# we do not want to include these negative particles in the phrase; these are expressions like "never", etc.
negation_fixed = {
# Belarusian
'ні' : ['раз'],
'ня' : ['толькі'],

# Upper Sorbian
'nic' : ['naposledku'],

# Polish
'nie' : ['mało'],

# Pomak
'néma' : ['kak'],

# Slovenian
'ne' : ['le'],

# Russian and Old East Slavic
'не' : ['то', 'токмо'],
'ни' : ['в', 'раз', 'шатко'],
'нет' : ['нет']
}

def write_node_info(self, node,
tense = None,
Expand All @@ -51,12 +76,46 @@ def write_node_info(self, node,
if val != None:
node.misc[self.dictionary[key]] = val

def get_polarity(self, node, neg):
if node.feats['Polarity'] != "":
return node.feats['Polarity']
if len(neg) == 0:
return None
return 'Neg'
def has_fixed_children(self, node):
"""
Returns True if the node has any children with the 'fixed' relation and the node's lemma along with the child's lemma are listed in self.negation_fixed.
"""
fixed_children = [x for x in node.children if x.udeprel == 'fixed']

if fixed_children:
if fixed_children[0].lemma in self.negation_fixed.get(node.lemma, []):
return True
return False

def get_polarity(self, nodes):
"""
Returns 'Neg' if there is exactly one node with Polarity='Neg' among the given nodes.
Returns an empty string if there are zero or more than one such nodes.
"""
neg_count = 0
for node in nodes:
if node.feats['Polarity'] == 'Neg':
neg_count += 1

if neg_count == 1:
return 'Neg'

# neg_count can be zero or two, in either case we want to return an empty string so that the PhrasePolarity attribute is not generated
else:
return ''

def get_negative_particles(self, nodes):
"""
Returns a list of all negative particles found among the children
of the specified nodes, except for negative particles with fixed children specified in self.negation_fixed.
"""
neg_particles = []
for node in nodes:
neg = [x for x in node.children if x.upos == 'PART' and x.feats['Polarity'] == 'Neg' and x.udeprel == 'advmod' and not self.has_fixed_children(x)]
if neg:
neg_particles += neg
return neg_particles


def get_is_reflex(self,node,refl):
if node.feats['Voice'] == 'Mid':
Expand All @@ -75,4 +134,4 @@ def get_voice(self,node,refl):
if self.is_expl_pass(refl):
return 'Pass'
return voice

25 changes: 16 additions & 9 deletions udapi/block/msf/slavic/conditional.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@ def process_node(self, node):
# the conditional mood can be formed using the auxiliary verb or some conjunctions (such as 'aby, kdyby...' in Czech)
# so x.udeprel == 'aux' can't be required because it doesn't meet the conjunctions

if len(aux_cnd) > 0 and len(cop) == 0:
if aux_cnd and not cop:
aux = [x for x in node.children if x.udeprel == 'aux' or x.feats['Mood'] == 'Cnd'] # all auxiliary verbs and conjuctions with feats['Mood'] == 'Cnd'
refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl']
neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART']

phrase_nodes = [node] + aux + refl

phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in refl] + [x.ord for x in neg]
neg = self.get_negative_particles(phrase_nodes)
phrase_nodes += neg

phrase_ords = [x.ord for x in phrase_nodes]
phrase_ords.sort()

auxVerb = aux_cnd[0]
Expand All @@ -41,7 +45,7 @@ def process_node(self, node):
form='Fin',
aspect=node.feats['Aspect'],
reflex=self.get_is_reflex(node,refl),
polarity=self.get_polarity(node,neg),
polarity=self.get_polarity(phrase_nodes),
voice=self.get_voice(node, refl),
ords=phrase_ords,
gender=node.feats['Gender'],
Expand All @@ -53,15 +57,18 @@ def process_node(self, node):
cop = [x for x in node.children if x.udeprel == 'cop' and (x.feats['VerbForm'] == 'Part' or x.feats['VerbForm'] == 'Fin')]
aux_cnd = [x for x in node.children if x.feats['Mood'] == 'Cnd' or x.deprel=='aux:cnd']

if len(cop) > 0 and len(aux_cnd) > 0:
if cop and aux_cnd:
# there can be a copula with Mood='Cnd' (i. e. in Old East Slavonic), we don't want to count these copula in phrase_ords twice, so there is x.udeprel != 'cop' in aux list
aux = [x for x in node.children if (x.udeprel == 'aux' or x.feats['Mood'] == 'Cnd') and x.udeprel != 'cop']
neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART']
prep = [x for x in node.children if x.upos == 'ADP']
refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl']

phrase_nodes = [node] + aux + prep + refl + cop
neg = self.get_negative_particles(phrase_nodes)
phrase_nodes += neg

copVerb = cop[0]
phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in cop] + [x.ord for x in neg] + [x.ord for x in prep] + [x.ord for x in refl]
phrase_ords = [x.ord for x in phrase_nodes]
phrase_ords.sort()
self.write_node_info(node,
aspect=copVerb.feats['Aspect'],
Expand All @@ -70,9 +77,9 @@ def process_node(self, node):
mood='Cnd',
form='Fin',
voice=self.get_voice(copVerb, refl),
polarity=self.get_polarity(copVerb,neg),
polarity=self.get_polarity(phrase_nodes),
reflex=self.get_is_reflex(node, refl),
ords=phrase_ords,
gender=copVerb.feats['Gender'],
animacy=copVerb.feats['Animacy']
)
)
33 changes: 21 additions & 12 deletions udapi/block/msf/slavic/converb.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@ def process_node(self, node):
# condition node.upos == 'VERB' to prevent copulas from entering this branch
if node.feats['VerbForm'] == 'Conv' and node.upos == 'VERB':
refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl']
neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART']

phrase_ords = [node.ord] + [x.ord for x in refl] + [x.ord for x in neg]

phrase_nodes = [node] + refl
neg = self.get_negative_particles(phrase_nodes)
phrase_nodes += neg

phrase_ords = [x.ord for x in phrase_nodes]
phrase_ords.sort()

self.write_node_info(node,
Expand All @@ -23,7 +26,7 @@ def process_node(self, node):
form='Conv',
tense=node.feats['Tense'],
aspect=node.feats['Aspect'],
polarity=self.get_polarity(node,neg),
polarity=self.get_polarity(phrase_nodes),
reflex=self.get_is_reflex(node,refl),
ords=phrase_ords,
gender=node.feats['Gender'],
Expand All @@ -35,10 +38,13 @@ def process_node(self, node):
elif node.upos == 'ADJ':
aux = [x for x in node.children if x.udeprel == 'aux' and x.feats['VerbForm'] == 'Conv']

if len(aux) > 0:
neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART']
if aux:
auxVerb = aux[0]
phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in neg]

phrase_nodes = [node] + aux
neg = self.get_negative_particles(phrase_nodes)
phrase_nodes += neg
phrase_ords = [x.ord for x in phrase_nodes]
phrase_ords.sort()

self.write_node_info(node,
Expand All @@ -47,7 +53,7 @@ def process_node(self, node):
form='Conv',
tense=auxVerb.feats['Tense'],
aspect=node.feats['Aspect'],
polarity=self.get_polarity(auxVerb,neg),
polarity=self.get_polarity(phrase_nodes),
ords=phrase_ords,
gender=auxVerb.feats['Gender'],
animacy=auxVerb.feats['Animacy'],
Expand All @@ -58,13 +64,16 @@ def process_node(self, node):
else:
cop = [x for x in node.children if x.udeprel == 'cop' and x.feats['VerbForm'] == 'Conv']

if len(cop) > 0:
if cop:
prep = [x for x in node.children if x.upos == 'ADP']
neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART']
refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl']

copVerb = cop[0]
phrase_ords = [node.ord] + [x.ord for x in cop] + [x.ord for x in prep] + [x.ord for x in neg] + [x.ord for x in refl]

phrase_nodes = [node] + cop + prep + refl
neg = self.get_negative_particles(phrase_nodes)
phrase_nodes += neg
phrase_ords = [x.ord for x in phrase_nodes]
phrase_ords.sort()


Expand All @@ -76,7 +85,7 @@ def process_node(self, node):
gender=copVerb.feats['Gender'],
animacy=copVerb.feats['Animacy'],
form='Conv',
polarity=self.get_polarity(node,neg),
polarity=self.get_polarity(phrase_nodes),
ords=phrase_ords,
voice=self.get_voice(copVerb, refl)
)
Loading