Revert "Upgrading Elasticsearch (bug 1013874)"

This reverts commit a403c60.
yohanboniface · Sep 8, 2014 · 134c16c · 134c16c
1 parent d40c31a
commit 134c16c
Show file tree

Hide file tree

Showing 39 changed files with 905 additions and 548 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -4,8 +4,9 @@ python:
 services:
   - memcached
 before_install:
-  - wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.3.2.deb && sudo dpkg -i elasticsearch-1.3.2.deb
-  - sudo /usr/share/elasticsearch/bin/elasticsearch -d -D es.path.data=/tmp -D es.gateway.type=none -D es.index.store.type=memory -D es.discovery.zen.ping.multicast.enabled=false
+  - wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-0.90.13.deb && sudo dpkg -i elasticsearch-0.90.13.deb
+  - sudo /usr/share/elasticsearch/bin/plugin -install elasticsearch/elasticsearch-analysis-icu/1.13.0
+  - sudo /usr/share/elasticsearch/bin/elasticsearch -Des.config=scripts/elasticsearch/elasticsearch.yml
 install:
   - make update_deps
   - pip install --no-deps -r requirements/test.txt --find-links https://pyrepo.addons.mozilla.org/

diff --git a/apps/addons/cron.py b/apps/addons/cron.py
@@ -21,6 +21,7 @@
 import amo
 from amo.decorators import write
 from amo.utils import chunked, walkfiles
+from addons import search
 from addons.models import Addon, AppSupport, FrozenAddon, Persona
 from files.models import File
 from lib.es.utils import raise_if_reindex_in_progress
@@ -473,8 +474,10 @@ def give_personas_versions():
 
 
 @cronjobs.register
-def reindex_addons(index=None, addon_type=None):
+def reindex_addons(index=None, aliased=True, addon_type=None):
     from . import tasks
+    # Make sure our mapping is up to date.
+    search.setup_mapping(index, aliased)
     ids = (Addon.objects.values_list('id', flat=True)
            .filter(_current_version__isnull=False,
                    status__in=amo.VALID_STATUSES,

diff --git a/apps/addons/search.py b/apps/addons/search.py
@@ -4,17 +4,14 @@
 from django.conf import settings
 from django.core.exceptions import ObjectDoesNotExist
 
+import pyes.exceptions as pyes
+
 import amo
 import amo.search
-from amo.models import SearchMixin
-from addons.cron import reindex_addons
 from addons.models import Persona
-from bandwagon.cron import reindex_collections
+from amo.utils import create_es_index_if_missing
 from bandwagon.models import Collection
-from compat.cron import compatibility_report
 from compat.models import AppCompat
-from lib.es.utils import create_index
-from users.cron import reindex_users
 from users.models import UserProfile
 from versions.compare import version_int
 
@@ -67,7 +64,7 @@ def extract(addon):
             # This would otherwise get attached when by the transformer.
             d['weekly_downloads'] = addon.persona.popularity
             # Boost on popularity.
-            d['boost'] = addon.persona.popularity ** .2
+            d['_boost'] = addon.persona.popularity ** .2
             d['has_theme_rereview'] = (
                 addon.persona.rereviewqueuetheme_set.exists())
         except Persona.DoesNotExist:
@@ -76,10 +73,10 @@ def extract(addon):
     else:
         # Boost by the number of users on a logarithmic scale. The maximum
         # boost (11,000,000 users for adblock) is about 5x.
-        d['boost'] = addon.average_daily_users ** .2
+        d['_boost'] = addon.average_daily_users ** .2
     # Double the boost if the add-on is public.
     if addon.status == amo.STATUS_PUBLIC and 'boost' in d:
-        d['boost'] = max(d['boost'], 1) * 4
+        d['_boost'] = max(d['_boost'], 1) * 4
 
     # Indices for each language. languages is a list of locales we want to
     # index with analyzer if the string's locale matches.
@@ -101,50 +98,39 @@ def extract(addon):
     return d
 
 
-def get_alias():
-    return settings.ES_INDEXES.get(SearchMixin.ES_ALIAS_KEY)
-
-
-def create_new_index(index=None, config=None):
-    if config is None:
-        config = {}
-    if index is None:
-        index = get_alias()
-    config['settings'] = {'index': INDEX_SETTINGS}
-    config['mappings'] = get_mappings()
-    create_index(index, config)
-
-
-def get_mappings():
+def setup_mapping(index=None, aliased=True):
+    """Set up the addons index mapping."""
     # Mapping describes how elasticsearch handles a document during indexing.
     # Most fields are detected and mapped automatically.
-    appver = {
-        'dynamic': False,
-        'properties': {
-            'max': {'type': 'long'},
-            'min': {'type': 'long'}
-        }
-    }
+    appver = {'dynamic': False, 'properties': {'max': {'type': 'long'},
+                                               'min': {'type': 'long'}}}
     mapping = {
+        # Optional boosting during indexing.
+        '_boost': {'name': '_boost', 'null_value': 1.0},
         'properties': {
-            'boost': {'type': 'float', 'null_value': 1.0},
             # Turn off analysis on name so we can sort by it.
             'name_sort': {'type': 'string', 'index': 'not_analyzed'},
             # Adding word-delimiter to split on camelcase and punctuation.
-            'name': {'type': 'string', 'analyzer': 'standardPlusWordDelimiter'},
-            'summary': {'type': 'string', 'analyzer': 'snowball'},
-            'description': {'type': 'string', 'analyzer': 'snowball'},
-            'tags': {'type': 'string', 'index': 'not_analyzed', 'index_name': 'tag'},
+            'name': {'type': 'string',
+                     'analyzer': 'standardPlusWordDelimiter'},
+            'summary': {'type': 'string',
+                        'analyzer': 'snowball'},
+            'description': {'type': 'string',
+                            'analyzer': 'snowball'},
+            'tags': {'type': 'string',
+                     'index': 'not_analyzed',
+                     'index_name': 'tag'},
             'platforms': {'type': 'integer', 'index_name': 'platform'},
             'appversion': {'properties': dict((app.id, appver)
                                               for app in amo.APP_USAGE)},
         },
     }
     # Add room for language-specific indexes.
     for analyzer in amo.SEARCH_ANALYZER_MAP:
-        if (not settings.ES_USE_PLUGINS
-           and analyzer in amo.SEARCH_ANALYZER_PLUGINS):
-            log.info('While creating mapping, skipping the %s analyzer' % analyzer)
+        if (not settings.ES_USE_PLUGINS and
+            analyzer in amo.SEARCH_ANALYZER_PLUGINS):
+            log.info('While creating mapping, skipping the %s analyzer'
+                     % analyzer)
             continue
 
         mapping['properties']['name_' + analyzer] = {
@@ -160,55 +146,14 @@ def get_mappings():
             'analyzer': analyzer,
         }
 
-    models = (Addon, AppCompat, Collection, UserProfile)
-    return dict((m._meta.db_table, mapping) for m in models)
-
-
-def reindex(index):
-    indexers = [
-        reindex_addons, reindex_collections, reindex_users, compatibility_report
-    ]
-    for indexer in indexers:
-        log.info('Indexing %r' % indexer.__name__)
+    es = amo.search.get_es()
+    # Adjust the mapping for all models at once because fields are shared
+    # across all doc types in an index. If we forget to adjust one of them
+    # we'll get burned later on.
+    for model in Addon, AppCompat, Collection, UserProfile:
+        index = index or model._get_index()
+        index = create_es_index_if_missing(index, aliased=aliased)
         try:
-            indexer(index)
-        except Exception:
-            # We want to log this event but continue
-            log.error('Indexer %r failed' % indexer.__name__)
-
-
-INDEX_SETTINGS = {
-    "analysis": {
-        "analyzer": {
-            "standardPlusWordDelimiter": {
-                "tokenizer": "standard",
-                "filter": ["standard", "wordDelim", "lowercase", "stop", "dict"]
-            }
-        },
-        "filter": {
-            "wordDelim": {
-                "type": "word_delimiter",
-                "preserve_original": True
-            },
-            "dict": {
-                "type": "dictionary_decompounder",
-                "word_list": [
-                    "cool", "iris", "fire", "bug", "flag", "fox", "grease",
-                    "monkey", "flash", "block", "forecast", "screen", "grab",
-                    "cookie", "auto", "fill", "text", "all", "so", "think",
-                    "mega", "upload", "download", "video", "map", "spring",
-                    "fix", "input", "clip", "fly", "lang", "up", "down",
-                    "persona", "css", "html", "http", "ball", "firefox",
-                    "bookmark", "chat", "zilla", "edit", "menu", "menus",
-                    "status", "bar", "with", "easy", "sync", "search", "google",
-                    "time", "window", "js", "super", "scroll", "title", "close",
-                    "undo", "user", "inspect", "inspector", "browser",
-                    "context", "dictionary", "mail", "button", "url",
-                    "password", "secure", "image", "new", "tab", "delete",
-                    "click", "name", "smart", "down", "manager", "open",
-                    "query", "net", "link", "blog", "this", "color", "select",
-                    "key", "keys", "foxy", "translate", "word", ]
-            }
-        }
-    }
-}
+            es.put_mapping(model._meta.db_table, mapping, index)
+        except pyes.ElasticSearchException, e:
+            log.error(e)
diff --git a/apps/addons/tests/test_cron.py b/apps/addons/tests/test_cron.py
@@ -1,9 +1,12 @@
 import os
 import datetime
 
+from nose.exc import SkipTest
 from nose.tools import eq_
 import mock
 
+from django.conf import settings
+
 import amo
 import amo.tests
 from addons import cron

diff --git a/apps/addons/tests/test_models.py b/apps/addons/tests/test_models.py
@@ -26,6 +26,7 @@
                            BlacklistedSlug, Category, Charity, CompatOverride,
                            CompatOverrideRange, FrozenAddon,
                            IncompatibleVersions, Persona, Preview)
+from addons.search import setup_mapping
 from applications.models import Application, AppVersion
 from constants.applications import DEVICE_TYPES
 from devhub.models import ActivityLog, AddonLog, RssKey, SubmitStep
@@ -2180,10 +2181,12 @@ class TestSearchSignals(amo.tests.ESTestCase):
 
     def setUp(self):
         super(TestSearchSignals, self).setUp()
+        setup_mapping()
         self.addCleanup(self.cleanup)
 
     def cleanup(self):
-        self.empty_index('default')
+        for index in settings.ES_INDEXES.values():
+            self.es.delete_index_if_exists(index)
 
     def test_no_addons(self):
         eq_(Addon.search().count(), 0)

diff --git a/apps/amo/models.py b/apps/amo/models.py
@@ -6,8 +6,8 @@
 from django.utils import encoding, translation
 
 import caching.base
-import elasticsearch
 import multidb.pinning
+import pyes.exceptions
 import queryset_transform
 
 from . import search
@@ -291,35 +291,40 @@ def update(self, **kw):
 
 class SearchMixin(object):
 
-    ES_ALIAS_KEY = 'default'
-
     @classmethod
     def _get_index(cls):
         indexes = settings.ES_INDEXES
-        return indexes.get(cls.ES_ALIAS_KEY)
+        return indexes.get(cls._meta.db_table) or indexes['default']
 
     @classmethod
-    def index(cls, document, id=None, refresh=False, index=None):
-        """Wrapper around Elasticsearch.index."""
+    def index(cls, document, id=None, bulk=False, index=None):
+        """Wrapper around pyes.ES.index."""
         search.get_es().index(
-            body=document, index=index or cls._get_index(),
-            doc_type=cls.get_mapping_type(), id=id, refresh=refresh)
+            document, index=index or cls._get_index(),
+            doc_type=cls._meta.db_table, id=id, bulk=bulk)
 
     @classmethod
     def unindex(cls, id, index=None):
         es = search.get_es()
         try:
             es.delete(index or cls._get_index(), cls._meta.db_table, id)
-        except elasticsearch.TransportError:
+        except pyes.exceptions.NotFoundException:
             # Item wasn't found, whatevs.
             pass
 
     @classmethod
     def search(cls, index=None):
         return search.ES(cls, index or cls._get_index())
 
+    # For compatibility with elasticutils > v0.5.
+    # TODO: Remove these when we've moved mkt to its own index.
+
+    @classmethod
+    def get_index(cls):
+        return cls._get_index()
+
     @classmethod
-    def get_mapping_type(cls):
+    def get_mapping_type_name(cls):
         return cls._meta.db_table