From 98c1380cf5ae58b211aa8cac92c88e9a1cbcbdde Mon Sep 17 00:00:00 2001
From: Victor Felder <victor@draft.li>
Date: Sun, 22 Jan 2017 15:53:38 +0100
Subject: [PATCH] test

---
 zds/forum/models.py                      |  6 ++++--
 zds/searchv2/models.py                   | 24 ++++++++++++++----------
 zds/tutorialv2/models/models_database.py |  6 +++++-
 zds/utils/templatetags/elasticsearch.py  |  4 ++--
 4 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/zds/forum/models.py b/zds/forum/models.py
index a8721468af..df13da64dd 100644
--- a/zds/forum/models.py
+++ b/zds/forum/models.py
@@ -170,6 +170,7 @@ class Topic(AbstractESDjangoIndexable):
     - Locked: none can write on a locked topic.
     - Sticky: sticky topics are displayed on top of topic lists (ex: on forum page).
     """
+    objects_per_batch = 1000
 
     class Meta:
         verbose_name = 'Sujet'
@@ -459,6 +460,7 @@ class Post(Comment, AbstractESDjangoIndexable):
     A post can be marked as useful: topic's author (or admin) can declare any topic as "useful", and this post is
     displayed as is on front.
     """
+    objects_per_batch = 1000
 
     topic = models.ForeignKey(Topic, verbose_name='Sujet', db_index=True)
 
@@ -509,8 +511,8 @@ def get_es_django_indexable(cls, force_reindexing=False):
         """
 
         q = super(Post, cls).get_es_django_indexable(force_reindexing)\
-            .select_related('topic')\
-            .select_related('topic__forum')
+            .prefetch_related('topic')\
+            .prefetch_related('topic__forum')
 
         return q
 
diff --git a/zds/searchv2/models.py b/zds/searchv2/models.py
index 70c69e7b62..d2e7ec6a55 100644
--- a/zds/searchv2/models.py
+++ b/zds/searchv2/models.py
@@ -131,7 +131,7 @@ def get_es_document_as_bulk_action(self, index, action='index'):
         }
 
         if action == 'index':
-            if self.es_id != '':
+            if self.es_id:
                 document['_id'] = self.es_id
             document['_source'] = self.get_es_document_source()
         elif action == 'update':
@@ -208,15 +208,17 @@ def get_es_indexable(cls, force_reindexing=False, objects_per_batch=100):
 
         query = cls.get_es_django_indexable(force_reindexing).order_by('pk')
         current_pk = 0
+        count = 0
 
         while True:
             objects = query.filter(pk__gt=current_pk).all()[:objects_per_batch]
+            count = count + objects_per_batch
 
             if not objects:
                 break
 
-            for obj in objects:
-                current_pk = obj.pk
+            current_pk = objects[-1].pk
+            print "{} so far, will continue at pk={}".format(count, current_pk)
 
             yield objects
 
@@ -261,7 +263,7 @@ def get_django_indexable_objects():
 class ESIndexManager(object):
     """Manage a given index with different taylor-made functions"""
 
-    def __init__(self, name, shards=5, replicas=0, objects_per_batch=100, connection_alias='default'):
+    def __init__(self, name, shards=5, replicas=0, connection_alias='default'):
         """Create a manager for a given index
 
         :param name: the index name
@@ -270,8 +272,6 @@ def __init__(self, name, shards=5, replicas=0, objects_per_batch=100, connection
         :type shards: int
         :param replicas: number of replicas
         :type replicas: int
-        :param objects_per_batch: limit the number of objects at one time
-        :type objects_per_batch: int
         :param connection_alias: the alias for connection
         :type connection_alias: str
         """
@@ -280,7 +280,6 @@ def __init__(self, name, shards=5, replicas=0, objects_per_batch=100, connection
 
         self.number_of_shards = shards
         self.number_of_replicas = replicas
-        self.objects_per_batch = 100
 
         self.logger = logging.getLogger('ESIndexManager:{}'.format(self.index))
 
@@ -452,7 +451,7 @@ def clear_indexing_of_model(self, model):
     @atomic
     def es_bulk_indexing_of_model(self, model, force_reindexing=False):
         """Perform a bulk action on documents of a given model.
-        Documents are batched according to ``self.objects_per_batch`` (``chunk_size`` is set accordingly).
+        Documents are batched according to ``model.objects_per_batch`` (``chunk_size`` is set accordingly).
 
         See http://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.Elasticsearch.bulk
         and http://elasticsearch-py.readthedocs.io/en/master/helpers.html#elasticsearch.helpers.streaming_bulk
@@ -469,14 +468,19 @@ def es_bulk_indexing_of_model(self, model, force_reindexing=False):
         if not self.connected_to_es:
             return
 
-        for objects in model.get_es_indexable(force_reindexing, self.objects_per_batch):
+        for objects in model.get_es_indexable(force_reindexing, model.objects_per_batch):
 
             def yield_formatted_documents():
                 for obj in objects:
                     yield obj.get_es_document_as_bulk_action(
                         self.index, 'update' if obj.es_already_indexed and not force_reindexing else 'index')
 
-            for _, hit in streaming_bulk(self.es, yield_formatted_documents(), chunk_size=self.objects_per_batch):
+            for _, hit in streaming_bulk(
+                self.es,
+                yield_formatted_documents(),
+                chunk_size=model.objects_per_batch,
+                timeout=30
+            ):
                 action = hit.keys()[0]
                 self.logger.info('{} {} with id {}'.format(action, hit[action]['_type'], hit[action]['_id']))
 
diff --git a/zds/tutorialv2/models/models_database.py b/zds/tutorialv2/models/models_database.py
index cd3a7f05df..bd87e2a7d1 100644
--- a/zds/tutorialv2/models/models_database.py
+++ b/zds/tutorialv2/models/models_database.py
@@ -561,6 +561,7 @@ class PublishedContent(AbstractESDjangoIndexable):
 
     Linked to a ``PublishableContent`` for the rest. Don't forget to add a ``.prefetch_related('content')`` !!
     """
+    objects_per_batch = 250
 
     class Meta:
         verbose_name = 'Contenu publié'
@@ -900,7 +901,10 @@ def get_es_indexable(cls, force_reindexing=False, objects_per_batch=100):
 
         index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
 
-        for contents in super(PublishedContent, cls).get_es_indexable(force_reindexing, objects_per_batch=100):
+        for contents in super(PublishedContent, cls).get_es_indexable(
+            force_reindexing,
+            objects_per_batch=cls.objects_per_batch
+        ):
             chapters = []
 
             for content in contents:
diff --git a/zds/utils/templatetags/elasticsearch.py b/zds/utils/templatetags/elasticsearch.py
index 54e0f56cc7..6a231a70a0 100644
--- a/zds/utils/templatetags/elasticsearch.py
+++ b/zds/utils/templatetags/elasticsearch.py
@@ -18,7 +18,7 @@ def format_highlight(highlighted_fragments):
 
     fragments = []
     for fragment in highlighted_fragments:
-        if fragment != '':
+        if fragment:
             fragments.append(
                 html_tag.sub('', fragment).replace('[hl]', '<mark class="highlighted">').replace('[/hl]', '</mark>'))
 
@@ -51,7 +51,7 @@ def render(self, context):
 
         text = ''
 
-        if search_result[field] != '':
+        if search_result[field]:
             text = html_tag.sub('', search_result[field])
 
         if 'highlight' in search_result.meta: