diff --git a/udapi/block/msf/phrase.py b/udapi/block/msf/phrase.py index c92e5e91..e39fc131 100644 --- a/udapi/block/msf/phrase.py +++ b/udapi/block/msf/phrase.py @@ -30,6 +30,31 @@ def process_node(self, node): 'animacy':'PhraseAnimacy', 'ords':'Phrase' } + + # a dictionary where the key is the lemma of a negative particle and the value is a list of the lemmas of their possible children that have a 'fixed' relation + # we do not want to include these negative particles in the phrase; these are expressions like "never", etc. + negation_fixed = { + # Belarusian + 'ні' : ['раз'], + 'ня' : ['толькі'], + + # Upper Sorbian + 'nic' : ['naposledku'], + + # Polish + 'nie' : ['mało'], + + # Pomak + 'néma' : ['kak'], + + # Slovenian + 'ne' : ['le'], + + # Russian and Old East Slavic + 'не' : ['то', 'токмо'], + 'ни' : ['в', 'раз', 'шатко'], + 'нет' : ['нет'] + } def write_node_info(self, node, tense = None, @@ -51,12 +76,46 @@ def write_node_info(self, node, if val != None: node.misc[self.dictionary[key]] = val - def get_polarity(self, node, neg): - if node.feats['Polarity'] != "": - return node.feats['Polarity'] - if len(neg) == 0: - return None - return 'Neg' + def has_fixed_children(self, node): + """ + Returns True if the node has any children with the 'fixed' relation and the node's lemma along with the child's lemma are listed in self.negation_fixed. + """ + fixed_children = [x for x in node.children if x.udeprel == 'fixed'] + + if fixed_children: + if fixed_children[0].lemma in self.negation_fixed.get(node.lemma, []): + return True + return False + + def get_polarity(self, nodes): + """ + Returns 'Neg' if there is exactly one node with Polarity='Neg' among the given nodes. + Returns an empty string if there are zero or more than one such nodes. + """ + neg_count = 0 + for node in nodes: + if node.feats['Polarity'] == 'Neg': + neg_count += 1 + + if neg_count == 1: + return 'Neg' + + # neg_count can be zero or two, in either case we want to return an empty string so that the PhrasePolarity attribute is not generated + else: + return '' + + def get_negative_particles(self, nodes): + """ + Returns a list of all negative particles found among the children + of the specified nodes, except for negative particles with fixed children specified in self.negation_fixed. + """ + neg_particles = [] + for node in nodes: + neg = [x for x in node.children if x.upos == 'PART' and x.feats['Polarity'] == 'Neg' and x.udeprel == 'advmod' and not self.has_fixed_children(x)] + if neg: + neg_particles += neg + return neg_particles + def get_is_reflex(self,node,refl): if node.feats['Voice'] == 'Mid': @@ -75,4 +134,4 @@ def get_voice(self,node,refl): if self.is_expl_pass(refl): return 'Pass' return voice - + diff --git a/udapi/block/msf/slavic/conditional.py b/udapi/block/msf/slavic/conditional.py index f5206519..89eafd6c 100644 --- a/udapi/block/msf/slavic/conditional.py +++ b/udapi/block/msf/slavic/conditional.py @@ -19,12 +19,16 @@ def process_node(self, node): # the conditional mood can be formed using the auxiliary verb or some conjunctions (such as 'aby, kdyby...' in Czech) # so x.udeprel == 'aux' can't be required because it doesn't meet the conjunctions - if len(aux_cnd) > 0 and len(cop) == 0: + if aux_cnd and not cop: aux = [x for x in node.children if x.udeprel == 'aux' or x.feats['Mood'] == 'Cnd'] # all auxiliary verbs and conjuctions with feats['Mood'] == 'Cnd' refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] + + phrase_nodes = [node] + aux + refl - phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in refl] + [x.ord for x in neg] + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() auxVerb = aux_cnd[0] @@ -41,7 +45,7 @@ def process_node(self, node): form='Fin', aspect=node.feats['Aspect'], reflex=self.get_is_reflex(node,refl), - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), voice=self.get_voice(node, refl), ords=phrase_ords, gender=node.feats['Gender'], @@ -53,15 +57,18 @@ def process_node(self, node): cop = [x for x in node.children if x.udeprel == 'cop' and (x.feats['VerbForm'] == 'Part' or x.feats['VerbForm'] == 'Fin')] aux_cnd = [x for x in node.children if x.feats['Mood'] == 'Cnd' or x.deprel=='aux:cnd'] - if len(cop) > 0 and len(aux_cnd) > 0: + if cop and aux_cnd: # there can be a copula with Mood='Cnd' (i. e. in Old East Slavonic), we don't want to count these copula in phrase_ords twice, so there is x.udeprel != 'cop' in aux list aux = [x for x in node.children if (x.udeprel == 'aux' or x.feats['Mood'] == 'Cnd') and x.udeprel != 'cop'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] prep = [x for x in node.children if x.upos == 'ADP'] refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] + phrase_nodes = [node] + aux + prep + refl + cop + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + copVerb = cop[0] - phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in cop] + [x.ord for x in neg] + [x.ord for x in prep] + [x.ord for x in refl] + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, aspect=copVerb.feats['Aspect'], @@ -70,9 +77,9 @@ def process_node(self, node): mood='Cnd', form='Fin', voice=self.get_voice(copVerb, refl), - polarity=self.get_polarity(copVerb,neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node, refl), ords=phrase_ords, gender=copVerb.feats['Gender'], animacy=copVerb.feats['Animacy'] - ) + ) \ No newline at end of file diff --git a/udapi/block/msf/slavic/converb.py b/udapi/block/msf/slavic/converb.py index e517a5c8..6b725d56 100644 --- a/udapi/block/msf/slavic/converb.py +++ b/udapi/block/msf/slavic/converb.py @@ -12,9 +12,12 @@ def process_node(self, node): # condition node.upos == 'VERB' to prevent copulas from entering this branch if node.feats['VerbForm'] == 'Conv' and node.upos == 'VERB': refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] - - phrase_ords = [node.ord] + [x.ord for x in refl] + [x.ord for x in neg] + + phrase_nodes = [node] + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, @@ -23,7 +26,7 @@ def process_node(self, node): form='Conv', tense=node.feats['Tense'], aspect=node.feats['Aspect'], - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node,refl), ords=phrase_ords, gender=node.feats['Gender'], @@ -35,10 +38,13 @@ def process_node(self, node): elif node.upos == 'ADJ': aux = [x for x in node.children if x.udeprel == 'aux' and x.feats['VerbForm'] == 'Conv'] - if len(aux) > 0: - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] + if aux: auxVerb = aux[0] - phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in neg] + + phrase_nodes = [node] + aux + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, @@ -47,7 +53,7 @@ def process_node(self, node): form='Conv', tense=auxVerb.feats['Tense'], aspect=node.feats['Aspect'], - polarity=self.get_polarity(auxVerb,neg), + polarity=self.get_polarity(phrase_nodes), ords=phrase_ords, gender=auxVerb.feats['Gender'], animacy=auxVerb.feats['Animacy'], @@ -58,13 +64,16 @@ def process_node(self, node): else: cop = [x for x in node.children if x.udeprel == 'cop' and x.feats['VerbForm'] == 'Conv'] - if len(cop) > 0: + if cop: prep = [x for x in node.children if x.upos == 'ADP'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] copVerb = cop[0] - phrase_ords = [node.ord] + [x.ord for x in cop] + [x.ord for x in prep] + [x.ord for x in neg] + [x.ord for x in refl] + + phrase_nodes = [node] + cop + prep + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() @@ -76,7 +85,7 @@ def process_node(self, node): gender=copVerb.feats['Gender'], animacy=copVerb.feats['Animacy'], form='Conv', - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), ords=phrase_ords, voice=self.get_voice(copVerb, refl) ) diff --git a/udapi/block/msf/slavic/future.py b/udapi/block/msf/slavic/future.py index 2d48f86c..02452c36 100644 --- a/udapi/block/msf/slavic/future.py +++ b/udapi/block/msf/slavic/future.py @@ -11,29 +11,29 @@ class Future(udapi.block.msf.phrase.Phrase): def process_node(self, node): # future tense for Serbian and Croatian aux = [x for x in node.children if x.udeprel == 'aux' and x.feats['Tense'] == 'Pres' and (x.lemma == 'hteti' or x.lemma == 'htjeti')] - if node.upos != 'AUX' and len(aux) != 0: + if node.upos != 'AUX' and aux: refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] aux_other = [x for x in node.children if x.udeprel == 'aux'] # adding aux for passive voice cop = [x for x in node.children if x.deprel == 'cop'] - phrase_ords = [node.ord] + [x.ord for x in refl] + [x.ord for x in neg] + [x.ord for x in aux_other] + [x.ord for x in cop] + + phrase_nodes = [node] + refl + aux_other + cop + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() - # u infinitivu neni vyznacen slovesny rod - # PhraseVoice ale chceme nastavit na activum, jelikoz se jedna o pomocne sloveso + infinitiv - voice=node.feats['Voice'] - #if voice == '': - # voice = 'Act' - if len(cop) == 0: + + if not cop: self.write_node_info(node, tense='Fut', person=aux[0].feats['Person'], number=aux[0].feats['Number'], mood='Ind', - voice=voice, + voice=node.feats['Voice'], aspect=node.feats['Aspect'], # srbstina ani chorvatstina vidy nema form='Fin', - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node,refl), gender=node.feats['Gender'], animacy=node.feats['Animacy'], @@ -41,6 +41,7 @@ def process_node(self, node): ) else: prep = [x for x in node.children if x.upos == 'ADP'] + phrase_nodes += prep phrase_ords += [x.ord for x in prep] phrase_ords.sort() @@ -49,10 +50,10 @@ def process_node(self, node): person=aux[0].feats['Person'], number=aux[0].feats['Number'], mood='Ind', - voice=voice, + voice=node.feats['Voice'], aspect=node.feats['Aspect'], form='Fin', - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node,refl), gender=node.feats['Gender'], animacy=node.feats['Animacy'], @@ -65,10 +66,14 @@ def process_node(self, node): # Bulgarian forms the future tense with the auxiliary word ще and a verb in the present tense aux = [x for x in node.children if x.lemma == 'ќе' or x.lemma == 'ще'] - if node.feats['Tense'] == 'Pres' and len(aux) > 0: + if node.feats['Tense'] == 'Pres' and aux: refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] - phrase_ords = [node.ord] + [x.ord for x in refl] + [x.ord for x in neg] + [x.ord for x in aux] + + phrase_nodes = [node] + refl + aux + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, @@ -79,7 +84,7 @@ def process_node(self, node): voice=node.feats['Voice'], aspect=node.feats['Aspect'], form='Fin', - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node,refl), ords=phrase_ords ) @@ -91,9 +96,11 @@ def process_node(self, node): """if node.feats['Aspect'] == 'Perf' and (node.feats['Tense'] == 'Pres' or node.feats['Tense'] == 'Fut') and node.feats['VerbForm'] != 'Conv': refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] + phrase_nodes = [node] + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg - phrase_ords = [node.ord] + [x.ord for x in refl] + [x.ord for x in neg] + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, @@ -104,7 +111,7 @@ def process_node(self, node): voice=self.get_voice(node,refl), form='Fin', aspect='Perf', - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node,refl), ords=phrase_ords ) @@ -119,11 +126,16 @@ def process_node(self, node): aux = [x for x in node.children if x.udeprel == 'aux' and x.feats['Tense'] == 'Fut'] refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] - if len(aux) > 0: + + phrase_nodes = [node] + aux + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + + phrase_ords = [x.ord for x in phrase_nodes] + phrase_ords.sort() + + if aux: auxVerb = aux[0] - phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in refl] + [x.ord for x in neg] - phrase_ords.sort() self.write_node_info(node, tense='Fut', person=auxVerb.feats['Person'], @@ -132,7 +144,7 @@ def process_node(self, node): voice=self.get_voice(node,refl), aspect=node.feats['Aspect'], form='Fin', - polarity=self.get_polarity(auxVerb,neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node,refl), ords=phrase_ords, gender=node.feats['Gender'], @@ -143,20 +155,17 @@ def process_node(self, node): # simple future tense - e.g. in Serbian, the future tense can be formed by combining a verb with a full meaning and an auxiliary verb into one word, i.e. without an auxiliary verb # or verbs like pojede, půjdeme... in Czech - if len(aux) == 0 and node.feats['Tense'] == 'Fut': - - phrase_ords = [node.ord] + [x.ord for x in refl] + [x.ord for x in neg] - phrase_ords.sort() - + if not aux and node.feats['Tense'] == 'Fut': + self.write_node_info(node, tense='Fut', person=node.feats['Person'], number=node.feats['Number'], mood='Ind', - voice=self.get_voice(node,refl), # passivum se muze objevit (napr. pojede se), ale jmenny rod neni vyjadren + voice=self.get_voice(node,refl), aspect=node.feats['Aspect'], form='Fin', - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node,refl), ords=phrase_ords ) @@ -164,14 +173,17 @@ def process_node(self, node): cop = [x for x in node.children if x.udeprel == 'cop' and x.feats['Tense'] == 'Fut'] - if len(cop) > 0: + if cop: copVerb = cop[0] aux = [x for x in node.children if x.udeprel == 'aux' and x.feats['Mood']=='Ind'] prep = [x for x in node.children if x.upos == 'ADP'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - phrase_ords = [node.ord] + [x.ord for x in cop] + [x.ord for x in aux] + [x.ord for x in prep] + [x.ord for x in neg] + [x.ord for x in refl] + phrase_nodes = [node] + cop + aux + prep + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, @@ -182,7 +194,7 @@ def process_node(self, node): mood='Ind', form='Fin', voice=self.get_voice(copVerb, refl), - polarity=self.get_polarity(copVerb,neg), + polarity=self.get_polarity(phrase_nodes), ords=phrase_ords ) diff --git a/udapi/block/msf/slavic/imperative.py b/udapi/block/msf/slavic/imperative.py index 209406e9..d4fedd50 100644 --- a/udapi/block/msf/slavic/imperative.py +++ b/udapi/block/msf/slavic/imperative.py @@ -12,9 +12,12 @@ def process_node(self, node): # the condition node.upos == 'VERB' ensures that copulas do not enter this branch if node.feats['Mood'] == 'Imp' and node.upos == 'VERB': refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] - phrase_ords = [node.ord] + [x.ord for x in refl] + [x.ord for x in neg] + phrase_nodes = [node] + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, @@ -24,7 +27,7 @@ def process_node(self, node): mood='Imp', form='Fin', voice='Act', - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node,refl), ords=phrase_ords ) @@ -33,10 +36,12 @@ def process_node(self, node): # verbs in the passive forms are marked as ADJ if node.upos == 'ADJ' and node.feats['Voice'] == 'Pass': aux = [x for x in node.children if x.udeprel == 'aux' and x.feats['Mood'] == 'Imp'] - if len(aux) > 0: - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] + if aux: + phrase_nodes = [node] + aux + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg - phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in neg] + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, @@ -46,7 +51,7 @@ def process_node(self, node): voice='Pass', aspect=node.feats['Aspect'], form='Fin', - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), ords=phrase_ords, gender=node.feats['Gender'], animacy=node.feats['Animacy'] @@ -55,13 +60,17 @@ def process_node(self, node): cop = [x for x in node.children if x.udeprel == 'cop' and x.feats['Mood'] == 'Imp'] - if len(cop) > 0: + if cop: prep = [x for x in node.children if x.upos == 'ADP'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] copVerb = cop[0] - phrase_ords = [node.ord] + [x.ord for x in cop] + [x.ord for x in prep] + [x.ord for x in neg] + [x.ord for x in refl] + + phrase_nodes = [node] + cop + prep + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, @@ -72,6 +81,6 @@ def process_node(self, node): form='Fin', voice=self.get_voice(copVerb, refl), reflex=self.get_is_reflex(node, refl), - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), ords=phrase_ords ) diff --git a/udapi/block/msf/slavic/infinitive.py b/udapi/block/msf/slavic/infinitive.py index 00c1bcf6..f39a2646 100644 --- a/udapi/block/msf/slavic/infinitive.py +++ b/udapi/block/msf/slavic/infinitive.py @@ -11,22 +11,22 @@ class Infinitive(udapi.block.msf.phrase.Phrase): def process_node(self,node): if node.feats['VerbForm'] == 'Inf' and node.upos == 'VERB': aux = [x for x in node.children if x.udeprel == 'aux'] - if len(aux) == 0: # the list of auxiliary list must be empty - we don't want to mark infinitives which are part of any other phrase (for example the infinititive is part of the future tense in Czech) + if not aux: # the list of auxiliary list must be empty - we don't want to mark infinitives which are part of any other phrase (for example the infinititive is part of the future tense in Czech) refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] - phrase_ords = [node.ord] + [x.ord for x in refl] + [x.ord for x in neg] - phrase_ords.sort() + phrase_nodes = [node] + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes == neg - voice='Act' - if self.is_expl_pass(refl): - voice='Pass' + phrase_ords = [x.ord for x in phrase_nodes] + phrase_ords.sort() + self.write_node_info(node, aspect=node.feats['Aspect'], - voice=voice, + voice=self.get_voice(node,refl), form='Inf', - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node,refl), ords=phrase_ords ) @@ -35,18 +35,21 @@ def process_node(self,node): if node.upos == 'ADJ' and node.feats['Voice'] == 'Pass': aux = [x for x in node.children if x.udeprel == 'aux' and x.feats['VerbForm'] == 'Inf'] aux_forb = [x for x in node.children if x.udeprel == 'aux' and x.feats['VerbForm'] != 'Inf'] - if len(aux) > 0 and len(aux_forb) == 0: - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] + if aux and not aux_forb: refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in neg] + [x.ord for x in refl] + phrase_nodes = [node] + aux + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, aspect=node.feats['Aspect'], voice='Pass', form='Inf', - polarity=self.get_polarity(aux[0],neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node, refl), ords=phrase_ords, gender=node.feats['Gender'], @@ -59,18 +62,22 @@ def process_node(self,node): cop = [x for x in node.children if x.udeprel == 'cop' and x.feats['VerbForm'] == 'Inf'] aux_forb = [x for x in node.children if x.udeprel == 'aux'] - if len(cop) > 0 and len(aux_forb) == 0: + if cop and not aux_forb: prep = [x for x in node.children if x.upos == 'ADP'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - phrase_ords = [node.ord] + [x.ord for x in cop] + [x.ord for x in prep] + [x.ord for x in neg] + [x.ord for x in refl] + + phrase_nodes = [node] + cop + prep + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, aspect=cop[0].feats['Aspect'], voice=self.get_voice(cop[0], refl), form='Inf', - polarity=self.get_polarity(cop[0],neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node, refl), ords=phrase_ords ) @@ -78,16 +85,19 @@ def process_node(self,node): # there is a rare verb form called supine in Slovenian, it is used instead of infinitive as the argument of motion verbs if node.feats['VerbForm'] == 'Sup': refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] + + phrase_nodes = [node] + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg - phrase_ords = [node.ord] + [x.ord for x in refl] + [x.ord for x in neg] + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, aspect=node.feats['Aspect'], voice='Act', form='Sup', - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node, refl), ords=phrase_ords ) diff --git a/udapi/block/msf/slavic/past.py b/udapi/block/msf/slavic/past.py index d4f3c7cd..423bff45 100644 --- a/udapi/block/msf/slavic/past.py +++ b/udapi/block/msf/slavic/past.py @@ -29,9 +29,12 @@ def process_node(self, node): # in Polish, verbs with Person=0 have also Tense=Past, in Ukrainian the tense is not specified if node.feats['Person'] == '0': refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] + + phrase_nodes = [node] + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg - phrase_ords = [node.ord] + [x.ord for x in refl] + [x.ord for x in neg] + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, @@ -42,7 +45,7 @@ def process_node(self, node): voice='Act', #In Polish, impersonal statements are annotated with Voice=Act. In Ukrainian, the Voice feature is missing; therefore, we decided to annotate these phrases with PhraseVoice=Act aspect=node.feats['Aspect'], form=node.feats['VerbForm'], - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node,refl), ords=phrase_ords, gender=node.feats['Gender'], @@ -54,20 +57,23 @@ def process_node(self, node): aux = [x for x in node.children if x.udeprel == 'aux' and x.feats['Tense'] == 'Pres'] aux_pqp = [x for x in node.children if x.udeprel == 'aux' and x.feats['Tense'] in past_tenses] refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] + + phrase_nodes = [node] + aux + refl + aux_pqp + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg - phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in refl] + [x.ord for x in neg] + [x.ord for x in aux_pqp] + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() aux_cnd = [x for x in node.children if (x.feats['Mood'] == 'Cnd' or x.deprel == 'aux:cnd') and x.udeprel != 'conj'] # we don't want to mark l-participles in the conditional as past tense - if len(aux_cnd) == 0: - if len(aux) > 0: + if not aux_cnd: + if aux: person = aux[0].feats['Person'] - elif len(aux) == 0: + elif not aux: person = '3' - if len(aux_pqp) > 0: + if aux_pqp: person = aux_pqp[0].feats['Person'] # in Slovenian, the participles are not annotated as Tense='Past', the Tense feature is missing here @@ -86,7 +92,7 @@ def process_node(self, node): voice=self.get_voice(node,refl), aspect=node.feats['Aspect'], form='Fin', - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node,refl), ords=phrase_ords, gender=node.feats['Gender'], @@ -100,13 +106,16 @@ def process_node(self, node): # the past tense is formed only by a content verb, not with an auxiliary aux_forb = [x for x in node.children if x.udeprel == 'aux'] + + if not aux_forb: - refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] + refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - if not aux_forb: + phrase_nodes = [node] + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg - phrase_ords = [node.ord] + [x.ord for x in refl] + [x.ord for x in neg] + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, @@ -117,7 +126,7 @@ def process_node(self, node): voice=self.get_voice(node,refl), aspect=node.feats['Aspect'], form=node.feats['VerbForm'], - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node,refl), ords=phrase_ords, gender=node.feats['Gender'], @@ -127,14 +136,18 @@ def process_node(self, node): # passive - elif node.upos == 'ADJ' and node.feats['Voice'] == 'Pass' and len(cop) == 0: + elif node.upos == 'ADJ' and node.feats['Voice'] == 'Pass' and not cop: aux_past_tense = [x for x in node.children if x.udeprel == 'aux' and (x.feats['Tense'] in past_tenses)] aux_cnd = [x for x in node.children if x.feats['Mood'] == 'Cnd' or x.deprel == 'aux:cnd'] # we don't want to mark l-participles in the conditional as past tense - if len(aux_cnd) == 0: - if len(aux_past_tense) > 0: + if not aux_cnd: + if aux_past_tense: aux_pres_tense = [x for x in node.children if x.udeprel == 'aux' and x.feats['Tense'] == 'Pres'] # e. g. the auxiliary 'jsem' in the phrase 'byl jsem přinucen' - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] - phrase_ords = [node.ord] + [x.ord for x in aux_past_tense] + [x.ord for x in aux_pres_tense] + [x.ord for x in neg] + + phrase_nodes = [node] + aux_past_tense + aux_pres_tense + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() person = '3' @@ -150,7 +163,7 @@ def process_node(self, node): voice='Pass', form='Fin', aspect=node.feats['Aspect'], - polarity=self.get_polarity(aux_past_tense[0],neg), + polarity=self.get_polarity(phrase_nodes), ords=phrase_ords, gender=node.feats['Gender'], animacy=node.feats['Animacy'] @@ -158,13 +171,16 @@ def process_node(self, node): else: aux_cnd = [x for x in node.children if x.feats['Mood'] == 'Cnd' or x.deprel == 'aux:cnd'] # we don't want to mark l-participles in the conditional as past tense - if len(cop) > 0 and len(aux_cnd) == 0: + if cop and not aux_cnd: aux_past_tense = [x for x in node.children if x.udeprel == 'aux' and x.feats['Tense'] == 'Pres'] prep = [x for x in node.children if x.upos == 'ADP'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - phrase_ords = [node.ord] + [x.ord for x in aux_past_tense] + [x.ord for x in cop] + [x.ord for x in prep] + [x.ord for x in neg] + [x.ord for x in refl] + phrase_nodes = [node] + aux_past_tense + cop + prep + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() person = '3' @@ -184,7 +200,7 @@ def process_node(self, node): voice=self.get_voice(cop[0], refl), form='Fin', reflex=self.get_is_reflex(node,refl), - polarity=self.get_polarity(cop[0],neg), + polarity=self.get_polarity(phrase_nodes), ords=phrase_ords, gender=cop[0].feats['Gender'], animacy=cop[0].feats['Animacy'] diff --git a/udapi/block/msf/slavic/preprocessor.py b/udapi/block/msf/slavic/preprocessor.py index e9a5e90b..804a081f 100644 --- a/udapi/block/msf/slavic/preprocessor.py +++ b/udapi/block/msf/slavic/preprocessor.py @@ -71,9 +71,13 @@ def process_node(self,node): if node.feats['Mood'] == 'Sub': node.feats['Mood'] = 'Cnd' - # # although infinitives in Old Church Slavonic are annotated with Tense=Pres, they do not convey tense; therefore, we remove this annotation + # although infinitives in Old Church Slavonic are annotated with Tense=Pres, they do not convey tense; therefore, we remove this annotation if node.feats['VerbForm'] == 'Inf': node.feats['Tense'] = '' + # in the russian Syntagrus corpus, the negative particles have no Polarity=Neg feature + if node.lemma == 'не' and node.upos == 'PART' and node.udeprel == 'advmod': + node.feats['Polarity'] = 'Neg' + # TODO maybe we want to set Tense=Fut for the perfective verbs with Tense=Pres? This could solve the problem with the simplified detection of the future tense in Czech # but there are many verbs with no Aspect value, so the problem is still there diff --git a/udapi/block/msf/slavic/present.py b/udapi/block/msf/slavic/present.py index e579c122..9a743a9e 100644 --- a/udapi/block/msf/slavic/present.py +++ b/udapi/block/msf/slavic/present.py @@ -12,12 +12,17 @@ def process_node(self,node): # the condition VerbForm == 'Fin' ensures that there are no transgressives between the found verbs # the aspect is not always given in Czech treebanks, so we can't rely on the fact that the imperfect aspect is specified if node.feats['Tense'] == 'Pres' and node.upos == 'VERB' and node.feats['VerbForm'] == 'Fin': #and node.feats['Aspect']=='Imp': - refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] + aux_forb = [x for x in node.children if x.upos == 'AUX' and (x.lemma == 'ќе' or x.lemma == 'ще' or x.feats['Mood'] == 'Cnd')] # forbidden auxiliaries for present tense (these auxiliaries are used for the future tense or the conditional mood) - if len(aux_forb) == 0: - phrase_ords = [node.ord] + [x.ord for x in refl] + [x.ord for x in neg] + if not aux_forb: + refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] + + phrase_nodes = [node] + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, @@ -28,7 +33,7 @@ def process_node(self,node): aspect=node.feats['Aspect'], voice=self.get_voice(node,refl), form='Fin', - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), reflex=self.get_is_reflex(node,refl), ords=phrase_ords ) @@ -38,12 +43,15 @@ def process_node(self,node): if node.upos == 'ADJ' and node.feats['Voice'] == 'Pass': aux = [x for x in node.children if x.udeprel == 'aux' and x.feats['Tense'] == 'Pres' and x.lemma != 'hteti' and x.lemma != 'htjeti'] aux_forb = [x for x in node.children if x.udeprel == 'aux' and x.feats['Tense'] != 'Pres'] # we don't want the past passive (e. g. 'byl jsem poučen' in Czech) - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] - - phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in neg] - phrase_ords.sort() - if len(aux) > 0 and len(aux_forb) == 0: + if aux and not aux_forb: + phrase_nodes = [node] + aux + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + + phrase_ords = [x.ord for x in phrase_nodes] + phrase_ords.sort() + auxVerb = aux[0] self.write_node_info(node, @@ -54,7 +62,7 @@ def process_node(self,node): aspect=node.feats['Aspect'], form='Fin', voice='Pass', - polarity=self.get_polarity(auxVerb,neg), + polarity=self.get_polarity(phrase_nodes), ords=phrase_ords, gender=node.feats['Gender'], animacy=node.feats['Animacy'] @@ -69,8 +77,12 @@ def process_node(self,node): if not aux_forb and not cop: refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] - phrase_ords = [node.ord] + [x.ord for x in refl] + [x.ord for x in neg] + + phrase_nodes = [node] + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, @@ -80,22 +92,26 @@ def process_node(self,node): form='Part', voice=self.get_voice(node, refl), reflex=self.get_is_reflex(node, refl), - polarity=self.get_polarity(node,neg), + polarity=self.get_polarity(phrase_nodes), ords=phrase_ords ) return cop = [x for x in node.children if x.udeprel == 'cop' and x.feats['Tense'] == 'Pres'] - aux = [x for x in node.children if x.udeprel == "aux" and x.feats['Mood'] == 'Ind' and x.feats['Tense'] == 'Pres'] aux_forb = [x for x in node.children if x.upos == 'AUX' and x.feats['Tense'] != 'Pres'] # in Serbian this can be a future tense - prep = [x for x in node.children if x.upos == 'ADP'] - neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART'] - refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] - - if len(cop) > 0 and len(aux_forb) == 0: + + if cop and not aux_forb: + aux = [x for x in node.children if x.udeprel == "aux" and x.feats['Mood'] == 'Ind' and x.feats['Tense'] == 'Pres'] + prep = [x for x in node.children if x.upos == 'ADP'] + refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl'] + + phrase_nodes = [node] + cop + aux + prep + refl + neg = self.get_negative_particles(phrase_nodes) + phrase_nodes += neg + copVerb = cop[0] - phrase_ords = [node.ord] + [x.ord for x in cop] + [x.ord for x in aux] + [x.ord for x in prep] + [x.ord for x in neg] + [x.ord for x in refl] + phrase_ords = [x.ord for x in phrase_nodes] phrase_ords.sort() self.write_node_info(node, @@ -107,6 +123,6 @@ def process_node(self,node): form='Fin', voice=self.get_voice(copVerb, refl), reflex=self.get_is_reflex(node, refl), - polarity=self.get_polarity(copVerb,neg), + polarity=self.get_polarity(phrase_nodes), ords=phrase_ords )