diff --git a/AcceleratedHTTPCacheManager.py b/AcceleratedHTTPCacheManager.py new file mode 100644 index 0000000..c3f7c76 --- /dev/null +++ b/AcceleratedHTTPCacheManager.py @@ -0,0 +1,267 @@ +############################################################################## +# +# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved. +# +# This software is subject to the provisions of the Zope Public License, +# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution. +# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED +# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS +# FOR A PARTICULAR PURPOSE +# +############################################################################## +''' +Accelerated HTTP cache manager -- + Adds caching headers to the response so that downstream caches will + cache according to a common policy. + +$Id$ +''' + +from OFS.Cache import Cache, CacheManager +from OFS.SimpleItem import SimpleItem +import logging +import socket +import time +import Globals +from Globals import DTMLFile +import urlparse, httplib +from cgi import escape +from urllib import quote +from App.Common import rfc1123_date + + +logger = logging.getLogger('Zope.AcceleratedHTTPCacheManager') + +class AcceleratedHTTPCache (Cache): + # Note the need to take thread safety into account. + # Also note that objects of this class are not persistent, + # nor do they use acquisition. + + connection_factory = httplib.HTTPConnection + + def __init__(self): + self.hit_counts = {} + + def initSettings(self, kw): + # Note that we lazily allow AcceleratedHTTPCacheManager + # to verify the correctness of the internal settings. + self.__dict__.update(kw) + + def ZCache_invalidate(self, ob): + # Note that this only works for default views of objects at + # their canonical path. If an object is viewed and cached at + # any other path via acquisition or virtual hosting, that + # cache entry cannot be purged because there is an infinite + # number of such possible paths, and Squid does not support + # any kind of fuzzy purging; we have to specify exactly the + # URL to purge. So we try to purge the known paths most + # likely to turn up in practice: the physical path and the + # current absolute_url_path. Any of those can be + # wrong in some circumstances, but it may be the best we can + # do :-( + # It would be nice if Squid's purge feature was better + # documented. (pot! kettle! black!) + + phys_path = ob.getPhysicalPath() + if self.hit_counts.has_key(phys_path): + del self.hit_counts[phys_path] + purge_paths = (ob.absolute_url_path(), quote('/'.join(phys_path))) + # Don't purge the same path twice. + if purge_paths[0] == purge_paths[1]: + purge_paths = purge_paths[:1] + results = [] + for url in self.notify_urls: + if not url.strip(): + continue + # Send the PURGE request to each HTTP accelerator. + if url[:7].lower() == 'http://': + u = url + else: + u = 'http://' + url + (scheme, host, path, params, query, fragment + ) = urlparse.urlparse(u) + if path.lower().startswith('/http://'): + path = path.lstrip('/') + for ob_path in purge_paths: + p = path.rstrip('/') + ob_path + h = self.connection_factory(host) + logger.debug('PURGING host %s, path %s' % (host, p)) + # An exception on one purge should not prevent the others. + try: + h.request('PURGE', p) + # This better not hang. I wish httplib gave us + # control of timeouts. + except socket.gaierror: + msg = 'socket.gaierror: maybe the server ' + \ + 'at %s is down, or the cache manager ' + \ + 'is misconfigured?' + logger.error(msg % url) + continue + r = h.getresponse() + status = '%s %s' % (r.status, r.reason) + results.append(status) + logger.debug('purge response: %s' % status) + return 'Server response(s): ' + ';'.join(results) + + def ZCache_get(self, ob, view_name, keywords, mtime_func, default): + return default + + def ZCache_set(self, ob, data, view_name, keywords, mtime_func): + # Note the blatant ignorance of view_name and keywords. + # Standard HTTP accelerators are not able to make use of this + # data. mtime_func is also ignored because using "now" for + # Last-Modified is as good as using any time in the past. + REQUEST = ob.REQUEST + RESPONSE = REQUEST.RESPONSE + anon = 1 + u = REQUEST.get('AUTHENTICATED_USER', None) + if u is not None: + if u.getUserName() != 'Anonymous User': + anon = 0 + phys_path = ob.getPhysicalPath() + if self.hit_counts.has_key(phys_path): + hits = self.hit_counts[phys_path] + else: + self.hit_counts[phys_path] = hits = [0,0] + if anon: + hits[0] = hits[0] + 1 + else: + hits[1] = hits[1] + 1 + + if not anon and self.anonymous_only: + return + # Set HTTP Expires and Cache-Control headers + seconds=self.interval + expires=rfc1123_date(time.time() + seconds) + RESPONSE.setHeader('Last-Modified',rfc1123_date(time.time())) + RESPONSE.setHeader('Cache-Control', 'max-age=%d' % seconds) + RESPONSE.setHeader('Expires', expires) + + +caches = {} +PRODUCT_DIR = __name__.split('.')[-2] + +class AcceleratedHTTPCacheManager (CacheManager, SimpleItem): + ' ' + + __ac_permissions__ = ( + ('View management screens', ('getSettings', + 'manage_main', + 'manage_stats', + 'getCacheReport', + 'sort_link')), + ('Change cache managers', ('manage_editProps',), ('Manager',)), + ) + + manage_options = ( + {'label':'Properties', 'action':'manage_main', + 'help':(PRODUCT_DIR, 'Accel.stx'),}, + {'label':'Statistics', 'action':'manage_stats', + 'help':(PRODUCT_DIR, 'Accel.stx'),}, + ) + CacheManager.manage_options + SimpleItem.manage_options + + meta_type = 'Accelerated HTTP Cache Manager' + + def __init__(self, ob_id): + self.id = ob_id + self.title = '' + self._settings = {'anonymous_only':1, + 'interval':3600, + 'notify_urls':()} + self.__cacheid = '%s_%f' % (id(self), time.time()) + + def getId(self): + ' ' + return self.id + + ZCacheManager_getCache__roles__ = () + def ZCacheManager_getCache(self): + cacheid = self.__cacheid + try: + return caches[cacheid] + except KeyError: + cache = AcceleratedHTTPCache() + cache.initSettings(self._settings) + caches[cacheid] = cache + return cache + + def getSettings(self): + ' ' + return self._settings.copy() # Don't let UI modify it. + + manage_main = DTMLFile('dtml/propsAccel', globals()) + + def manage_editProps(self, title, settings=None, REQUEST=None): + ' ' + if settings is None: + settings = REQUEST + self.title = str(title) + self._settings = { + 'anonymous_only':settings.get('anonymous_only') and 1 or 0, + 'interval':int(settings['interval']), + 'notify_urls':tuple(settings['notify_urls']),} + cache = self.ZCacheManager_getCache() + cache.initSettings(self._settings) + if REQUEST is not None: + return self.manage_main( + self, REQUEST, manage_tabs_message='Properties changed.') + + manage_stats = DTMLFile('dtml/statsAccel', globals()) + + def _getSortInfo(self): + """ + Returns the value of sort_by and sort_reverse. + If not found, returns default values. + """ + req = self.REQUEST + sort_by = req.get('sort_by', 'anon') + sort_reverse = int(req.get('sort_reverse', 1)) + return sort_by, sort_reverse + + def getCacheReport(self): + """ + Returns the list of objects in the cache, sorted according to + the user's preferences. + """ + sort_by, sort_reverse = self._getSortInfo() + c = self.ZCacheManager_getCache() + rval = [] + for path, (anon, auth) in c.hit_counts.items(): + rval.append({'path': '/'.join(path), + 'anon': anon, + 'auth': auth}) + if sort_by: + rval.sort(lambda e1, e2, sort_by=sort_by: + cmp(e1[sort_by], e2[sort_by])) + if sort_reverse: + rval.reverse() + return rval + + def sort_link(self, name, id): + """ + Utility for generating a sort link. + """ + # XXX This ought to be in a library or something. + sort_by, sort_reverse = self._getSortInfo() + url = self.absolute_url() + '/manage_stats?sort_by=' + id + newsr = 0 + if sort_by == id: + newsr = not sort_reverse + url = url + '&sort_reverse=' + (newsr and '1' or '0') + return '%s' % (escape(url, 1), escape(name)) + + +Globals.default__class_init__(AcceleratedHTTPCacheManager) + + +manage_addAcceleratedHTTPCacheManagerForm = DTMLFile('dtml/addAccel', + globals()) + +def manage_addAcceleratedHTTPCacheManager(self, id, REQUEST=None): + ' ' + self._setObject(id, AcceleratedHTTPCacheManager(id)) + if REQUEST is not None: + return self.manage_main(self, REQUEST) + +# FYI good resource: http://www.web-caching.com/proxy-caches.html diff --git a/tests/test_AcceleratedHTTPCacheManager.py b/tests/test_AcceleratedHTTPCacheManager.py new file mode 100644 index 0000000..dac1ea9 --- /dev/null +++ b/tests/test_AcceleratedHTTPCacheManager.py @@ -0,0 +1,154 @@ +############################################################################## +# +# Copyright (c) 2005 Zope Corporation and Contributors. +# All Rights Reserved. +# +# This software is subject to the provisions of the Zope Public License, +# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution. +# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED +# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS +# FOR A PARTICULAR PURPOSE. +# +############################################################################## +""" Unit tests for AcceleratedCacheManager module. + +$Id$ +""" + +import unittest +from Products.StandardCacheManagers.AcceleratedHTTPCacheManager \ + import AcceleratedHTTPCache, AcceleratedHTTPCacheManager + + +class DummyObject: + + def __init__(self, path='/path/to/object', urlpath=None): + self.path = path + if urlpath is None: + self.urlpath = path + else: + self.urlpath = urlpath + + def getPhysicalPath(self): + return tuple(self.path.split('/')) + + def absolute_url_path(self): + return self.urlpath + +class MockResponse: + status = '200' + reason = "who knows, I'm just a mock" + +def MockConnectionClassFactory(): + # Returns both a class that mocks an HTTPConnection, + # and a reference to a data structure where it logs requests. + request_log = [] + + class MockConnection: + # Minimal replacement for httplib.HTTPConnection. + def __init__(self, host): + self.host = host + self.request_log = request_log + + def request(self, method, path): + self.request_log.append({'method':method, + 'host':self.host, + 'path':path,}) + def getresponse(self): + return MockResponse() + + return MockConnection, request_log + + +class AcceleratedHTTPCacheTests(unittest.TestCase): + + def _getTargetClass(self): + return AcceleratedHTTPCache + + def _makeOne(self, *args, **kw): + return self._getTargetClass()(*args, **kw) + + def test_PURGE_passes_Host_header(self): + _TO_NOTIFY = 'localhost:1888' + cache = self._makeOne() + cache.notify_urls = ['http://%s' % _TO_NOTIFY] + cache.connection_factory, requests = MockConnectionClassFactory() + dummy = DummyObject() + cache.ZCache_invalidate(dummy) + self.assertEqual(len(requests), 1) + result = requests[-1] + self.assertEqual(result['method'], 'PURGE') + self.assertEqual(result['host'], _TO_NOTIFY) + self.assertEqual(result['path'], dummy.path) + + def test_multiple_notify(self): + cache = self._makeOne() + cache.notify_urls = ['http://foo', 'bar', 'http://baz/bat'] + cache.connection_factory, requests = MockConnectionClassFactory() + cache.ZCache_invalidate(DummyObject()) + self.assertEqual(len(requests), 3) + self.assertEqual(requests[0]['host'], 'foo') + self.assertEqual(requests[1]['host'], 'bar') + self.assertEqual(requests[2]['host'], 'baz') + cache.ZCache_invalidate(DummyObject()) + self.assertEqual(len(requests), 6) + + def test_vhost_purging_1447(self): + # Test for http://www.zope.org/Collectors/Zope/1447 + cache = self._makeOne() + cache.notify_urls = ['http://foo.com'] + cache.connection_factory, requests = MockConnectionClassFactory() + dummy = DummyObject(urlpath='/published/elsewhere') + cache.ZCache_invalidate(dummy) + # That should fire off two invalidations, + # one for the physical path and one for the abs. url path. + self.assertEqual(len(requests), 2) + self.assertEqual(requests[0]['path'], dummy.absolute_url_path()) + self.assertEqual(requests[1]['path'], dummy.path) + + +class CacheManagerTests(unittest.TestCase): + + def _getTargetClass(self): + return AcceleratedHTTPCacheManager + + def _makeOne(self, *args, **kw): + return self._getTargetClass()(*args, **kw) + + def _makeContext(self): + from OFS.Folder import Folder + root = Folder() + root.getPhysicalPath = lambda: ('', 'some_path',) + cm_id = 'http_cache' + manager = self._makeOne(cm_id) + root._setObject(cm_id, manager) + manager = root[cm_id] + return root, manager + + def test_add(self): + # ensure __init__ doesn't raise errors. + root, cachemanager = self._makeContext() + + def test_ZCacheManager_getCache(self): + root, cachemanager = self._makeContext() + cache = cachemanager.ZCacheManager_getCache() + self.assert_(isinstance(cache, AcceleratedHTTPCache)) + + def test_getSettings(self): + root, cachemanager = self._makeContext() + settings = cachemanager.getSettings() + self.assert_('anonymous_only' in settings.keys()) + self.assert_('interval' in settings.keys()) + self.assert_('notify_urls' in settings.keys()) + + +def test_suite(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(AcceleratedHTTPCacheTests)) + suite.addTest(unittest.makeSuite(CacheManagerTests)) + return suite + +if __name__ == '__main__': + unittest.main(defaultTest='test_suite') +