diff --git a/itn/chinese/rules/fraction.py b/itn/chinese/rules/fraction.py index f3c8117b..f61a1cdf 100644 --- a/itn/chinese/rules/fraction.py +++ b/itn/chinese/rules/fraction.py @@ -15,7 +15,8 @@ from itn.chinese.rules.cardinal import Cardinal from tn.processor import Processor -from pynini.lib.pynutil import delete, insert +from pynini import string_file +from pynini.lib.pynutil import delete, insert, add_weight class Fraction(Processor): @@ -27,14 +28,21 @@ def __init__(self): def build_tagger(self): number = Cardinal().number - - tagger = (insert('denominator: "') + number + + sign = string_file('itn/chinese/data/number/sign.tsv') # + - + + # NOTE(xcsong): default weight = 1.0, set to -1.0 means higher priority + # For example, + # 1.0, 负二分之三 -> { sign: "" denominator: "-2" numerator: "3" } + # -1.0,负二分之三 -> { sign: "-" denominator: "2" numerator: "3" } + tagger = (insert('sign: "') + add_weight(sign, -1.0).ques + + insert('" denominator: "') + number + delete('分之') + insert('" numerator: "') + number + insert('"')) self.tagger = self.add_tokens(tagger) def build_verbalizer(self): - numerator = delete('numerator: "') + self.SIGMA + delete('"') + sign = delete('sign: "') + self.SIGMA + delete('"') + numerator = delete(' numerator: "') + self.SIGMA + delete('"') denominator = delete(' denominator: "') + self.SIGMA + delete('"') - verbalizer = numerator + insert('/') + denominator + verbalizer = sign + numerator + insert('/') + denominator self.verbalizer = self.delete_tokens(verbalizer) diff --git a/runtime/processor/token_parser.cc b/runtime/processor/token_parser.cc index 4e640bdb..21a20ccc 100644 --- a/runtime/processor/token_parser.cc +++ b/runtime/processor/token_parser.cc @@ -34,7 +34,7 @@ const std::unordered_map> TN_ORDERS = { {"time", {"noon", "hour", "minute", "second"}}}; const std::unordered_map> ITN_ORDERS = { {"date", {"year", "month", "day"}}, - {"fraction", {"numerator", "denominator"}}, + {"fraction", {"sign", "numerator", "denominator"}}, {"measure", {"numerator", "denominator", "value"}}, {"money", {"currency", "value"}}, {"time", {"hour", "minute", "second", "noon"}}}; diff --git a/tn/token_parser.py b/tn/token_parser.py index 5db6b606..ef32222f 100644 --- a/tn/token_parser.py +++ b/tn/token_parser.py @@ -23,7 +23,7 @@ 'time': ['noon', 'hour', 'minute', 'second']} ITN_ORDERS = { 'date': ['year', 'month', 'day'], - 'fraction': ['numerator', 'denominator'], + 'fraction': ['sign', 'numerator', 'denominator'], 'measure': ['numerator', 'denominator', 'value'], 'money': ['currency', 'value'], 'time': ['hour', 'minute', 'second', 'noon']}