From 11e795fef4dd4f2002aa25cd06be276312bf9c3c Mon Sep 17 00:00:00 2001 From: Jeremy Archer Date: Tue, 31 May 2016 17:50:14 -0500 Subject: [PATCH] Add secondary cache in front of NDB. (#248) * Add secondary cache in front of NDB. * Protect cache endpoint from casual users. --- caravel/controllers/listings.py | 7 +- caravel/model/full_text.py | 19 +++-- caravel/model/utils.py | 82 ++++++++++++++++++++ caravel/tests/test_listings.py | 26 +++++++ caravel/tests/test_listings_cache_expect.txt | 0 cron.yaml | 2 +- 6 files changed, 129 insertions(+), 7 deletions(-) create mode 100644 caravel/model/utils.py create mode 100644 caravel/tests/test_listings_cache_expect.txt diff --git a/caravel/controllers/listings.py b/caravel/controllers/listings.py index 4c19614..3dd96d3 100644 --- a/caravel/controllers/listings.py +++ b/caravel/controllers/listings.py @@ -3,7 +3,7 @@ """ from flask import render_template, request, abort, redirect, url_for, session -from flask import flash +from flask import flash, jsonify from werkzeug.routing import BaseConverter @@ -279,6 +279,11 @@ def edit_listing(listing): return render_template("listing_form.html", form=form) +@app.route("/_internal/cache", methods=["GET"]) +def cache_info(): + return jsonify(model.utils.cache_stats()) + + @app.route("/new", methods=["GET", "POST"]) def new_listing(): """ diff --git a/caravel/model/full_text.py b/caravel/model/full_text.py index 1b416ca..49bc995 100644 --- a/caravel/model/full_text.py +++ b/caravel/model/full_text.py @@ -1,5 +1,6 @@ from google.appengine.ext import ndb from google.appengine.api import memcache +from caravel.model import utils import json import itertools @@ -31,6 +32,11 @@ class FullTextMixin(ndb.Model): keywords = AvoidRereadingFromPBProperty( lambda self: list(set(self._keywords())), repeated=True) + @classmethod + def get_by_id(klass, key_name): + """Avoid the NDB cache since it sux.""" + return utils.get_keys([ndb.Key(klass.__name__, key_name)])[0] + @classmethod def matching(klass, query, offset=0, limit=None): """Returns entities matching the given search query.""" @@ -38,9 +44,11 @@ def matching(klass, query, offset=0, limit=None): # Check contents of cache. words = tokenize(query) if not words: - for item in klass.query().order(-klass.posted_at): - if item._keywords(): - yield item + keys = klass.query().order(-klass.posted_at).iter(keys_only=True) + for keys in grouper(keys, n=12): + for entity in utils.get_keys([key for key in keys if key]): + if entity and entity._keywords(): + yield entity return results = memcache.get_multi(words, key_prefix=FTS) @@ -61,13 +69,13 @@ def matching(klass, query, offset=0, limit=None): matches = (matches & set(keys)) if matches else set(keys) # Write back modified cache. - memcache.set_multi(writeback, key_prefix=FTS, time=(3600*24*7)) + memcache.set_multi(writeback, key_prefix=FTS, time=(3600 * 24 * 7)) # Elide potentially stale entries from the cache. keys = [key for key in keys if key in matches] for keys in grouper(keys, n=12): - for entity in ndb.get_multi([key for key in keys if key]): + for entity in utils.get_keys([key for key in keys if key]): if entity and set(words).issubset(set(entity.keywords)): yield entity @@ -75,6 +83,7 @@ def _post_put_hook(self, future): """Clear the cache of entries maching these keywords.""" memcache.delete_multi(self.keywords, key_prefix=FTS) + utils.invalidate_keys([self.key]) return super(FullTextMixin, self)._post_put_hook(future) diff --git a/caravel/model/utils.py b/caravel/model/utils.py new file mode 100644 index 0000000..f5c2af8 --- /dev/null +++ b/caravel/model/utils.py @@ -0,0 +1,82 @@ +from google.appengine.datastore import entity_pb +from google.appengine.api import memcache +from google.appengine.ext import ndb + + +RTC = "rtc:" +INVALIDATIONS = "stat:inv" +HITS = "stat:hit" +MISSES = "stat:miss" + + +def ndb_entity_to_protobuf(e): + """ + Saves an entity to a protobuf. + """ + if e is None: + return "" + return ndb.ModelAdapter().entity_to_pb(e).Encode() + + +def protobuf_to_ndb_entity(pb): + """ + Reads an entity from a protobuf. + """ + + # precondition: model class must be imported + if pb == "": + return None + return ndb.ModelAdapter().pb_to_entity(entity_pb.EntityProto(pb)) + + +def invalidate_keys(keys): + """ + Removes the given entities from memcache. + """ + + memcache.incr(INVALIDATIONS, delta=len(keys), initial_value=0) + memcache.delete_multi([key.urlsafe() for key in keys], key_prefix=RTC) + + +def get_keys(keys): + """ + Fetch and save the given keys into memcache. + """ + + # Perform the first load from memcache. + urlsafes = [key.urlsafe() for key in keys] + cache = memcache.get_multi(urlsafes, key_prefix=RTC) + results = {} + + missing = [] + for urlsafe, key in zip(urlsafes, keys): + if urlsafe in cache: + results[key] = protobuf_to_ndb_entity(cache[urlsafe]) + else: + missing.append(key) + + # Record cache stats. + memcache.incr(MISSES, delta=len(missing), initial_value=0) + memcache.incr(HITS, delta=len(results), initial_value=0) + + # Fill anything not yet in cache. + if missing: + writeback = {} + for key, entity in zip(missing, ndb.get_multi(missing)): + results[key] = entity + writeback[key.urlsafe()] = ndb_entity_to_protobuf(entity) + memcache.set_multi(writeback, key_prefix=RTC) + + return [results[key] for key in keys] + + +def cache_stats(): + """ + Returns a count of the hits, misses, and invalidations of the current day. + """ + + return { + "invalidations": memcache.get(INVALIDATIONS) or 0, + "hits": memcache.get(HITS) or 0, + "misses": memcache.get(MISSES) or 0, + } diff --git a/caravel/tests/test_listings.py b/caravel/tests/test_listings.py index 7e328a7..4436ebf 100644 --- a/caravel/tests/test_listings.py +++ b/caravel/tests/test_listings.py @@ -1,12 +1,14 @@ from caravel.storage import config from caravel.tests import helper from caravel import model +from caravel.model import utils import unittest import StringIO import time import re import datetime +import json class TestListings(helper.CaravelTestCase): @@ -353,3 +355,27 @@ def test_bump_listing(self): self.listing_a = self.listing_a.key.get() self.assertFalse(self.listing_a.can_bump) self.assertTrue(self.listing_a.age <= datetime.timedelta(seconds=60)) + + def test_cache(self): + # Warm the cache. + self.get("/") + self.get("/listing_a") + self.get("/favicon.ico") + self.get("/apartments.atom") + + # Make sure future reads do not hit the database. + before = utils.cache_stats() + for i in xrange(20): + self.get("/") + self.get("/listing_a") + self.get("/favicon.ico") + self.get("/apartments.atom") + after = utils.cache_stats() + + self.assertEquals(before["invalidations"], after["invalidations"]) + self.assertEquals(before["misses"], after["misses"]) + self.assertGreaterEqual(after["hits"] - before["hits"], 20) + + self.assertEqual( + json.loads(self.get("/_internal/cache").data), after + ) diff --git a/caravel/tests/test_listings_cache_expect.txt b/caravel/tests/test_listings_cache_expect.txt new file mode 100644 index 0000000..e69de29 diff --git a/cron.yaml b/cron.yaml index 4ad97b3..e39ac8a 100644 --- a/cron.yaml +++ b/cron.yaml @@ -4,7 +4,7 @@ cron: schedule: every 24 hours - description: migrate database schema url: /_internal/migrate_schema - schedule: every 20 minutes + schedule: every 2 hours - description: nag moderators url: /_internal/nag_moderators schedule: every day 09:00