Skip to content

Commit

Permalink
Revert "Upgrading Elasticsearch (bug 1013874)"
Browse files Browse the repository at this point in the history
This reverts commit a403c60.
  • Loading branch information
yohanboniface committed Sep 8, 2014
1 parent d40c31a commit 134c16c
Show file tree
Hide file tree
Showing 39 changed files with 905 additions and 548 deletions.
5 changes: 3 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ python:
services:
- memcached
before_install:
- wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.3.2.deb && sudo dpkg -i elasticsearch-1.3.2.deb
- sudo /usr/share/elasticsearch/bin/elasticsearch -d -D es.path.data=/tmp -D es.gateway.type=none -D es.index.store.type=memory -D es.discovery.zen.ping.multicast.enabled=false
- wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-0.90.13.deb && sudo dpkg -i elasticsearch-0.90.13.deb
- sudo /usr/share/elasticsearch/bin/plugin -install elasticsearch/elasticsearch-analysis-icu/1.13.0
- sudo /usr/share/elasticsearch/bin/elasticsearch -Des.config=scripts/elasticsearch/elasticsearch.yml
install:
- make update_deps
- pip install --no-deps -r requirements/test.txt --find-links https://pyrepo.addons.mozilla.org/
Expand Down
5 changes: 4 additions & 1 deletion apps/addons/cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import amo
from amo.decorators import write
from amo.utils import chunked, walkfiles
from addons import search
from addons.models import Addon, AppSupport, FrozenAddon, Persona
from files.models import File
from lib.es.utils import raise_if_reindex_in_progress
Expand Down Expand Up @@ -473,8 +474,10 @@ def give_personas_versions():


@cronjobs.register
def reindex_addons(index=None, addon_type=None):
def reindex_addons(index=None, aliased=True, addon_type=None):
from . import tasks
# Make sure our mapping is up to date.
search.setup_mapping(index, aliased)
ids = (Addon.objects.values_list('id', flat=True)
.filter(_current_version__isnull=False,
status__in=amo.VALID_STATUSES,
Expand Down
125 changes: 35 additions & 90 deletions apps/addons/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist

import pyes.exceptions as pyes

import amo
import amo.search
from amo.models import SearchMixin
from addons.cron import reindex_addons
from addons.models import Persona
from bandwagon.cron import reindex_collections
from amo.utils import create_es_index_if_missing
from bandwagon.models import Collection
from compat.cron import compatibility_report
from compat.models import AppCompat
from lib.es.utils import create_index
from users.cron import reindex_users
from users.models import UserProfile
from versions.compare import version_int

Expand Down Expand Up @@ -67,7 +64,7 @@ def extract(addon):
# This would otherwise get attached when by the transformer.
d['weekly_downloads'] = addon.persona.popularity
# Boost on popularity.
d['boost'] = addon.persona.popularity ** .2
d['_boost'] = addon.persona.popularity ** .2
d['has_theme_rereview'] = (
addon.persona.rereviewqueuetheme_set.exists())
except Persona.DoesNotExist:
Expand All @@ -76,10 +73,10 @@ def extract(addon):
else:
# Boost by the number of users on a logarithmic scale. The maximum
# boost (11,000,000 users for adblock) is about 5x.
d['boost'] = addon.average_daily_users ** .2
d['_boost'] = addon.average_daily_users ** .2
# Double the boost if the add-on is public.
if addon.status == amo.STATUS_PUBLIC and 'boost' in d:
d['boost'] = max(d['boost'], 1) * 4
d['_boost'] = max(d['_boost'], 1) * 4

# Indices for each language. languages is a list of locales we want to
# index with analyzer if the string's locale matches.
Expand All @@ -101,50 +98,39 @@ def extract(addon):
return d


def get_alias():
return settings.ES_INDEXES.get(SearchMixin.ES_ALIAS_KEY)


def create_new_index(index=None, config=None):
if config is None:
config = {}
if index is None:
index = get_alias()
config['settings'] = {'index': INDEX_SETTINGS}
config['mappings'] = get_mappings()
create_index(index, config)


def get_mappings():
def setup_mapping(index=None, aliased=True):
"""Set up the addons index mapping."""
# Mapping describes how elasticsearch handles a document during indexing.
# Most fields are detected and mapped automatically.
appver = {
'dynamic': False,
'properties': {
'max': {'type': 'long'},
'min': {'type': 'long'}
}
}
appver = {'dynamic': False, 'properties': {'max': {'type': 'long'},
'min': {'type': 'long'}}}
mapping = {
# Optional boosting during indexing.
'_boost': {'name': '_boost', 'null_value': 1.0},
'properties': {
'boost': {'type': 'float', 'null_value': 1.0},
# Turn off analysis on name so we can sort by it.
'name_sort': {'type': 'string', 'index': 'not_analyzed'},
# Adding word-delimiter to split on camelcase and punctuation.
'name': {'type': 'string', 'analyzer': 'standardPlusWordDelimiter'},
'summary': {'type': 'string', 'analyzer': 'snowball'},
'description': {'type': 'string', 'analyzer': 'snowball'},
'tags': {'type': 'string', 'index': 'not_analyzed', 'index_name': 'tag'},
'name': {'type': 'string',
'analyzer': 'standardPlusWordDelimiter'},
'summary': {'type': 'string',
'analyzer': 'snowball'},
'description': {'type': 'string',
'analyzer': 'snowball'},
'tags': {'type': 'string',
'index': 'not_analyzed',
'index_name': 'tag'},
'platforms': {'type': 'integer', 'index_name': 'platform'},
'appversion': {'properties': dict((app.id, appver)
for app in amo.APP_USAGE)},
},
}
# Add room for language-specific indexes.
for analyzer in amo.SEARCH_ANALYZER_MAP:
if (not settings.ES_USE_PLUGINS
and analyzer in amo.SEARCH_ANALYZER_PLUGINS):
log.info('While creating mapping, skipping the %s analyzer' % analyzer)
if (not settings.ES_USE_PLUGINS and
analyzer in amo.SEARCH_ANALYZER_PLUGINS):
log.info('While creating mapping, skipping the %s analyzer'
% analyzer)
continue

mapping['properties']['name_' + analyzer] = {
Expand All @@ -160,55 +146,14 @@ def get_mappings():
'analyzer': analyzer,
}

models = (Addon, AppCompat, Collection, UserProfile)
return dict((m._meta.db_table, mapping) for m in models)


def reindex(index):
indexers = [
reindex_addons, reindex_collections, reindex_users, compatibility_report
]
for indexer in indexers:
log.info('Indexing %r' % indexer.__name__)
es = amo.search.get_es()
# Adjust the mapping for all models at once because fields are shared
# across all doc types in an index. If we forget to adjust one of them
# we'll get burned later on.
for model in Addon, AppCompat, Collection, UserProfile:
index = index or model._get_index()
index = create_es_index_if_missing(index, aliased=aliased)
try:
indexer(index)
except Exception:
# We want to log this event but continue
log.error('Indexer %r failed' % indexer.__name__)


INDEX_SETTINGS = {
"analysis": {
"analyzer": {
"standardPlusWordDelimiter": {
"tokenizer": "standard",
"filter": ["standard", "wordDelim", "lowercase", "stop", "dict"]
}
},
"filter": {
"wordDelim": {
"type": "word_delimiter",
"preserve_original": True
},
"dict": {
"type": "dictionary_decompounder",
"word_list": [
"cool", "iris", "fire", "bug", "flag", "fox", "grease",
"monkey", "flash", "block", "forecast", "screen", "grab",
"cookie", "auto", "fill", "text", "all", "so", "think",
"mega", "upload", "download", "video", "map", "spring",
"fix", "input", "clip", "fly", "lang", "up", "down",
"persona", "css", "html", "http", "ball", "firefox",
"bookmark", "chat", "zilla", "edit", "menu", "menus",
"status", "bar", "with", "easy", "sync", "search", "google",
"time", "window", "js", "super", "scroll", "title", "close",
"undo", "user", "inspect", "inspector", "browser",
"context", "dictionary", "mail", "button", "url",
"password", "secure", "image", "new", "tab", "delete",
"click", "name", "smart", "down", "manager", "open",
"query", "net", "link", "blog", "this", "color", "select",
"key", "keys", "foxy", "translate", "word", ]
}
}
}
}
es.put_mapping(model._meta.db_table, mapping, index)
except pyes.ElasticSearchException, e:
log.error(e)
3 changes: 3 additions & 0 deletions apps/addons/tests/test_cron.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import os
import datetime

from nose.exc import SkipTest
from nose.tools import eq_
import mock

from django.conf import settings

import amo
import amo.tests
from addons import cron
Expand Down
5 changes: 4 additions & 1 deletion apps/addons/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
BlacklistedSlug, Category, Charity, CompatOverride,
CompatOverrideRange, FrozenAddon,
IncompatibleVersions, Persona, Preview)
from addons.search import setup_mapping
from applications.models import Application, AppVersion
from constants.applications import DEVICE_TYPES
from devhub.models import ActivityLog, AddonLog, RssKey, SubmitStep
Expand Down Expand Up @@ -2180,10 +2181,12 @@ class TestSearchSignals(amo.tests.ESTestCase):

def setUp(self):
super(TestSearchSignals, self).setUp()
setup_mapping()
self.addCleanup(self.cleanup)

def cleanup(self):
self.empty_index('default')
for index in settings.ES_INDEXES.values():
self.es.delete_index_if_exists(index)

def test_no_addons(self):
eq_(Addon.search().count(), 0)
Expand Down
25 changes: 15 additions & 10 deletions apps/amo/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from django.utils import encoding, translation

import caching.base
import elasticsearch
import multidb.pinning
import pyes.exceptions
import queryset_transform

from . import search
Expand Down Expand Up @@ -291,35 +291,40 @@ def update(self, **kw):

class SearchMixin(object):

ES_ALIAS_KEY = 'default'

@classmethod
def _get_index(cls):
indexes = settings.ES_INDEXES
return indexes.get(cls.ES_ALIAS_KEY)
return indexes.get(cls._meta.db_table) or indexes['default']

@classmethod
def index(cls, document, id=None, refresh=False, index=None):
"""Wrapper around Elasticsearch.index."""
def index(cls, document, id=None, bulk=False, index=None):
"""Wrapper around pyes.ES.index."""
search.get_es().index(
body=document, index=index or cls._get_index(),
doc_type=cls.get_mapping_type(), id=id, refresh=refresh)
document, index=index or cls._get_index(),
doc_type=cls._meta.db_table, id=id, bulk=bulk)

@classmethod
def unindex(cls, id, index=None):
es = search.get_es()
try:
es.delete(index or cls._get_index(), cls._meta.db_table, id)
except elasticsearch.TransportError:
except pyes.exceptions.NotFoundException:
# Item wasn't found, whatevs.
pass

@classmethod
def search(cls, index=None):
return search.ES(cls, index or cls._get_index())

# For compatibility with elasticutils > v0.5.
# TODO: Remove these when we've moved mkt to its own index.

@classmethod
def get_index(cls):
return cls._get_index()

@classmethod
def get_mapping_type(cls):
def get_mapping_type_name(cls):
return cls._meta.db_table


Expand Down

0 comments on commit 134c16c

Please sign in to comment.