Skip to content

Commit

Permalink
Make more robust usage of affix conditions, fix edge case of Ukrainia…
Browse files Browse the repository at this point in the history
…n dictionaries
  • Loading branch information
zverok committed Jan 23, 2022
1 parent bf93941 commit f92f74b
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 28 deletions.
13 changes: 9 additions & 4 deletions spylls/hunspell/algo/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,12 +566,11 @@ def good_suffix(suffix):
all(f not in suffix.flags for f in forbidden_flags)
)

# We are selecting suffixes that have flags and settings, and their regexp pattern match
# the provided word.
# We are selecting suffixes that have flags and settings.
possible_suffixes = (
suffix
for suffix in self.aff.suffixes_index.lookup(word[::-1])
if good_suffix(suffix) and suffix.lookup_regexp.search(word)
if good_suffix(suffix)
)

# With all of those suffixes, we are producing AffixForms of the word passed
Expand All @@ -580,6 +579,9 @@ def good_suffix(suffix):
# stem (named ``strip``). For example, suffix might be declared as ``(strip=y, add=ier)``,
# then to restore the original stem from word "prettier" we must remove "ier" and add back "y"
stem = suffix.replace_regexp.sub(suffix.strip, word)
# Even with matching flags, the suffix's condition still might prohibit this form
if not suffix.cond_regexp.search(stem):
continue

yield AffixForm(word, stem, suffix=suffix)

Expand Down Expand Up @@ -609,12 +611,15 @@ def good_prefix(prefix):
possible_prefixes = (
prefix
for prefix in self.aff.prefixes_index.lookup(word)
if good_prefix(prefix) and prefix.lookup_regexp.search(word)
if good_prefix(prefix)
)

for prefix in possible_prefixes:
stem = prefix.replace_regexp.sub(prefix.strip, word)

if not prefix.cond_regexp.search(stem):
continue

yield AffixForm(word, stem, prefix=prefix)

# Second prefix is tried *only* when there is the setting ``COMPLEXPREFIXES`` in
Expand Down
25 changes: 1 addition & 24 deletions spylls/hunspell/data/aff.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,18 +233,7 @@ class Prefix(Affix):

def __post_init__(self):
# "-" does NOT have a special regex-meaning, while might happen as a regular word char (for ex., hu_HU)
condition = self.condition.replace('-', '\\-')
self.cond_regexp = re.compile('^' + condition)

cond_parts = re.findall(r'(\[.+\]|[^\[])', condition)
cond_parts = cond_parts[len(self.strip):]

if cond_parts and cond_parts != ['.']:
cond = '(?=' + ''.join(cond_parts) + ')'
else:
cond = ''

self.lookup_regexp = re.compile('^' + self.add + cond)
self.cond_regexp = re.compile('^' + self.condition.replace('-', '\\-'))
self.replace_regexp = re.compile('^' + self.add)

def __repr__(self):
Expand All @@ -264,18 +253,6 @@ class Suffix(Affix):
def __post_init__(self):
# "-" does NOT have a special regex-meaning, while might happen as a regular word char (for ex., hu_HU)
self.cond_regexp = re.compile(self.condition.replace('-', '\\-') + '$')

cond_parts = re.findall(r'(\[.+\]|[^\[])', self.condition)
if self.strip:
cond_parts = cond_parts[:-len(self.strip)]

if cond_parts and cond_parts != ['.']:
cond = '(' + ''.join(cond_parts) + ')'
else:
cond = ''

cond = cond.replace('-', '\\-')
self.lookup_regexp = re.compile(cond + self.add + '$')
self.replace_regexp = re.compile(self.add + '$')

def __repr__(self):
Expand Down

0 comments on commit f92f74b

Please sign in to comment.