Skip to content

Commit

Permalink
Remove Unidecode from postgres_search. Fix #5001 (#5514)
Browse files Browse the repository at this point in the history
  • Loading branch information
rSedoy authored and gasman committed Aug 20, 2019
1 parent f9753f1 commit d6e4072
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Changelog
* Fix: `pageurl` and `slugurl` tags no longer fail when `request.site` is `None` (Samir Shah)
* Fix: Output form media on add/edit image forms with custom models (Matt Westcott)
* Fix: Layout for the clear checkbox in default FileField widget (Mikalai Radchuk)
* Fix: Remove ASCII conversion from Postgres search backend, to support stemming in non-Latin alphabets (Pavel Denisov)


2.6.1 (05.08.2019)
Expand Down
1 change: 1 addition & 0 deletions CONTRIBUTORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ Contributors
* Jonathan Liuti
* Rahmi Pruitt
* Sanyam Khurana
* Pavel Denisov

Translators
===========
Expand Down
1 change: 1 addition & 0 deletions docs/releases/2.7.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Bug fixes
* ``pageurl`` and ``slugurl`` tags no longer fail when ``request.site`` is ``None`` (Samir Shah)
* Output form media on add/edit image forms with custom models (Matt Westcott)
* Fixes layout for the clear checkbox in default FileField widget (Mikalai Radchuk)
* Remove ASCII conversion from Postgres search backend, to support stemming in non-Latin alphabets (Pavel Denisov)


Upgrade considerations
Expand Down
9 changes: 3 additions & 6 deletions wagtail/contrib/postgres_search/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from .models import IndexEntry
from .utils import (
get_content_type_pk, get_descendants_content_types_pks, get_postgresql_connections,
get_sql_weights, get_weight, unidecode)
get_sql_weights, get_weight)

EMPTY_VECTOR = SearchVector(Value(''))

Expand Down Expand Up @@ -70,7 +70,7 @@ def prepare_value(self, value):
def prepare_field(self, obj, field):
if isinstance(field, SearchField):
yield (field, get_weight(field.boost),
unidecode(self.prepare_value(field.get_value(obj))))
self.prepare_value(field.get_value(obj)))
elif isinstance(field, RelatedFields):
sub_obj = field.get_value(obj)
if sub_obj is None:
Expand Down Expand Up @@ -227,16 +227,13 @@ def get_search_field(self, field_lookup, fields=None):
and field.field_name == field_lookup:
return self.get_search_field(sub_field_name, field.fields)

def prepare_word(self, word):
return unidecode(word)

def build_tsquery_content(self, query, group=False):
if isinstance(query, PlainText):
query_formats = []
query_params = []
for word in query.query_string.split():
query_formats.append(self.TSQUERY_WORD_FORMAT)
query_params.append(self.prepare_word(word))
query_params.append(word)
operator = self.TSQUERY_OPERATORS[query.operator]
query_format = operator.join(query_formats)
if group and len(query_formats) > 1:
Expand Down
44 changes: 44 additions & 0 deletions wagtail/contrib/postgres_search/tests/test_stemming.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import unittest

from django.conf import settings
from django.db import connection
from django.test import TestCase

from wagtail.search.backends import get_search_backend
from wagtail.tests.search import models


class TestPostgresStemming(TestCase):
def setUp(self):
backend_name = "wagtail.contrib.postgres_search.backend"
for conf in settings.WAGTAILSEARCH_BACKENDS.values():
if conf['BACKEND'] == backend_name:
break
else:
raise unittest.SkipTest("Only for %s" % backend_name)

self.backend = get_search_backend(backend_name)

def test_ru_stemming(self):
with connection.cursor() as cursor:
cursor.execute(
"SET default_text_search_config TO 'pg_catalog.russian'"
)

ru_book = models.Book.objects.create(
title="Голубое сало", publication_date="1999-05-01",
number_of_pages=352
)
self.backend.add(ru_book)

results = self.backend.search("Голубое", models.Book)
self.assertEqual(list(results), [ru_book])


results = self.backend.search("Голубая", models.Book)
self.assertEqual(list(results), [ru_book])

results = self.backend.search("Голубой", models.Book)
self.assertEqual(list(results), [ru_book])

ru_book.delete()
7 changes: 0 additions & 7 deletions wagtail/contrib/postgres_search/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,6 @@

from wagtail.search.index import Indexed, RelatedFields, SearchField

try:
# Only use the GPLv2 licensed unidecode if it's installed.
from unidecode import unidecode
except ImportError:
def unidecode(value):
return value


def get_postgresql_connections():
return [connection for connection in connections.all()
Expand Down

0 comments on commit d6e4072

Please sign in to comment.