From 98c1380cf5ae58b211aa8cac92c88e9a1cbcbdde Mon Sep 17 00:00:00 2001 From: Victor Felder Date: Sun, 22 Jan 2017 15:53:38 +0100 Subject: [PATCH] test --- zds/forum/models.py | 6 ++++-- zds/searchv2/models.py | 24 ++++++++++++++---------- zds/tutorialv2/models/models_database.py | 6 +++++- zds/utils/templatetags/elasticsearch.py | 4 ++-- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/zds/forum/models.py b/zds/forum/models.py index a8721468af..df13da64dd 100644 --- a/zds/forum/models.py +++ b/zds/forum/models.py @@ -170,6 +170,7 @@ class Topic(AbstractESDjangoIndexable): - Locked: none can write on a locked topic. - Sticky: sticky topics are displayed on top of topic lists (ex: on forum page). """ + objects_per_batch = 1000 class Meta: verbose_name = 'Sujet' @@ -459,6 +460,7 @@ class Post(Comment, AbstractESDjangoIndexable): A post can be marked as useful: topic's author (or admin) can declare any topic as "useful", and this post is displayed as is on front. """ + objects_per_batch = 1000 topic = models.ForeignKey(Topic, verbose_name='Sujet', db_index=True) @@ -509,8 +511,8 @@ def get_es_django_indexable(cls, force_reindexing=False): """ q = super(Post, cls).get_es_django_indexable(force_reindexing)\ - .select_related('topic')\ - .select_related('topic__forum') + .prefetch_related('topic')\ + .prefetch_related('topic__forum') return q diff --git a/zds/searchv2/models.py b/zds/searchv2/models.py index 70c69e7b62..d2e7ec6a55 100644 --- a/zds/searchv2/models.py +++ b/zds/searchv2/models.py @@ -131,7 +131,7 @@ def get_es_document_as_bulk_action(self, index, action='index'): } if action == 'index': - if self.es_id != '': + if self.es_id: document['_id'] = self.es_id document['_source'] = self.get_es_document_source() elif action == 'update': @@ -208,15 +208,17 @@ def get_es_indexable(cls, force_reindexing=False, objects_per_batch=100): query = cls.get_es_django_indexable(force_reindexing).order_by('pk') current_pk = 0 + count = 0 while True: objects = query.filter(pk__gt=current_pk).all()[:objects_per_batch] + count = count + objects_per_batch if not objects: break - for obj in objects: - current_pk = obj.pk + current_pk = objects[-1].pk + print "{} so far, will continue at pk={}".format(count, current_pk) yield objects @@ -261,7 +263,7 @@ def get_django_indexable_objects(): class ESIndexManager(object): """Manage a given index with different taylor-made functions""" - def __init__(self, name, shards=5, replicas=0, objects_per_batch=100, connection_alias='default'): + def __init__(self, name, shards=5, replicas=0, connection_alias='default'): """Create a manager for a given index :param name: the index name @@ -270,8 +272,6 @@ def __init__(self, name, shards=5, replicas=0, objects_per_batch=100, connection :type shards: int :param replicas: number of replicas :type replicas: int - :param objects_per_batch: limit the number of objects at one time - :type objects_per_batch: int :param connection_alias: the alias for connection :type connection_alias: str """ @@ -280,7 +280,6 @@ def __init__(self, name, shards=5, replicas=0, objects_per_batch=100, connection self.number_of_shards = shards self.number_of_replicas = replicas - self.objects_per_batch = 100 self.logger = logging.getLogger('ESIndexManager:{}'.format(self.index)) @@ -452,7 +451,7 @@ def clear_indexing_of_model(self, model): @atomic def es_bulk_indexing_of_model(self, model, force_reindexing=False): """Perform a bulk action on documents of a given model. - Documents are batched according to ``self.objects_per_batch`` (``chunk_size`` is set accordingly). + Documents are batched according to ``model.objects_per_batch`` (``chunk_size`` is set accordingly). See http://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.Elasticsearch.bulk and http://elasticsearch-py.readthedocs.io/en/master/helpers.html#elasticsearch.helpers.streaming_bulk @@ -469,14 +468,19 @@ def es_bulk_indexing_of_model(self, model, force_reindexing=False): if not self.connected_to_es: return - for objects in model.get_es_indexable(force_reindexing, self.objects_per_batch): + for objects in model.get_es_indexable(force_reindexing, model.objects_per_batch): def yield_formatted_documents(): for obj in objects: yield obj.get_es_document_as_bulk_action( self.index, 'update' if obj.es_already_indexed and not force_reindexing else 'index') - for _, hit in streaming_bulk(self.es, yield_formatted_documents(), chunk_size=self.objects_per_batch): + for _, hit in streaming_bulk( + self.es, + yield_formatted_documents(), + chunk_size=model.objects_per_batch, + timeout=30 + ): action = hit.keys()[0] self.logger.info('{} {} with id {}'.format(action, hit[action]['_type'], hit[action]['_id'])) diff --git a/zds/tutorialv2/models/models_database.py b/zds/tutorialv2/models/models_database.py index cd3a7f05df..bd87e2a7d1 100644 --- a/zds/tutorialv2/models/models_database.py +++ b/zds/tutorialv2/models/models_database.py @@ -561,6 +561,7 @@ class PublishedContent(AbstractESDjangoIndexable): Linked to a ``PublishableContent`` for the rest. Don't forget to add a ``.prefetch_related('content')`` !! """ + objects_per_batch = 250 class Meta: verbose_name = 'Contenu publiƩ' @@ -900,7 +901,10 @@ def get_es_indexable(cls, force_reindexing=False, objects_per_batch=100): index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX) - for contents in super(PublishedContent, cls).get_es_indexable(force_reindexing, objects_per_batch=100): + for contents in super(PublishedContent, cls).get_es_indexable( + force_reindexing, + objects_per_batch=cls.objects_per_batch + ): chapters = [] for content in contents: diff --git a/zds/utils/templatetags/elasticsearch.py b/zds/utils/templatetags/elasticsearch.py index 54e0f56cc7..6a231a70a0 100644 --- a/zds/utils/templatetags/elasticsearch.py +++ b/zds/utils/templatetags/elasticsearch.py @@ -18,7 +18,7 @@ def format_highlight(highlighted_fragments): fragments = [] for fragment in highlighted_fragments: - if fragment != '': + if fragment: fragments.append( html_tag.sub('', fragment).replace('[hl]', '').replace('[/hl]', '')) @@ -51,7 +51,7 @@ def render(self, context): text = '' - if search_result[field] != '': + if search_result[field]: text = html_tag.sub('', search_result[field]) if 'highlight' in search_result.meta: