diff --git a/bin/dul-web b/bin/dul-web new file mode 100644 index 000000000..35a8ecbcc --- /dev/null +++ b/bin/dul-web @@ -0,0 +1,37 @@ +#!/usr/bin/python +# dul-web - HTTP-based git server +# Copyright (C) 2010 David Borowitz +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; version 2 +# of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. + +import os +import sys +from dulwich.repo import Repo +from dulwich.server import GitBackend +from dulwich.web import HTTPGitApplication +from wsgiref.simple_server import make_server + +if __name__ == "__main__": + if len(sys.argv) > 1: + gitdir = sys.argv[1] + else: + gitdir = os.getcwd() + + backend = GitBackend(Repo(gitdir)) + app = HTTPGitApplication(backend) + # TODO: allow serving on other ports via command-line flag + server = make_server('', 8000, app) + server.serve_forever() diff --git a/dulwich/object_store.py b/dulwich/object_store.py index cad989443..f95f8afc2 100644 --- a/dulwich/object_store.py +++ b/dulwich/object_store.py @@ -20,6 +20,7 @@ """Git object store interfaces and implementation.""" +import errno import itertools import os import stat @@ -269,15 +270,21 @@ def __iter__(self): def packs(self): """List with pack objects.""" if self._pack_cache is None: - self._pack_cache = list(self._load_packs()) + self._pack_cache = self._load_packs() return self._pack_cache def _load_packs(self): if not os.path.exists(self.pack_dir): - return + return [] + pack_files = [] for name in os.listdir(self.pack_dir): + # TODO: verify that idx exists first if name.startswith("pack-") and name.endswith(".pack"): - yield Pack(os.path.join(self.pack_dir, name[:-len(".pack")])) + filename = os.path.join(self.pack_dir, name) + pack_files.append((os.stat(filename).st_mtime, filename)) + pack_files.sort(reverse=True) + suffix_len = len(".pack") + return [Pack(f[:-suffix_len]) for _, f in pack_files] def _add_known_pack(self, path): """Add a newly appeared pack to the cache by path. diff --git a/dulwich/repo.py b/dulwich/repo.py index 41581d91f..dca20d34d 100644 --- a/dulwich/repo.py +++ b/dulwich/repo.py @@ -522,6 +522,18 @@ def __init__(self, object_store, refs): self.object_store = object_store self.refs = refs + def get_named_file(self, path): + """Get a file from the control dir with a specific name. + + Although the filename should be interpreted as a filename relative to + the control dir in a disk-baked Repo, the object returned need not be + pointing to a file in that location. + + :param path: The path to the file, relative to the control dir. + :return: An open file object, or None if the file does not exist. + """ + raise NotImplementedError(self.get_named_file) + def fetch(self, target, determine_wants=None, progress=None): """Fetch objects into another repository. @@ -685,6 +697,23 @@ def controldir(self): """Return the path of the control directory.""" return self._controldir + def get_named_file(self, path): + """Get a file from the control dir with a specific name. + + Although the filename should be interpreted as a filename relative to + the control dir in a disk-baked Repo, the object returned need not be + pointing to a file in that location. + + :param path: The path to the file, relative to the control dir. + :return: An open file object, or None if the file does not exist. + """ + try: + return open(os.path.join(self.controldir(), path.lstrip('/')), 'rb') + except (IOError, OSError), e: + if e.errno == errno.ENOENT: + return None + raise + def index_path(self): """Return path to the index file.""" return os.path.join(self.controldir(), INDEX_FILENAME) diff --git a/dulwich/server.py b/dulwich/server.py index eff09ac6c..1824cfd82 100644 --- a/dulwich/server.py +++ b/dulwich/server.py @@ -120,10 +120,13 @@ def capabilities(self): class UploadPackHandler(Handler): """Protocol handler for uploading a pack to the server.""" - def __init__(self, backend, read, write): + def __init__(self, backend, read, write, + stateless_rpc=False, advertise_refs=False): Handler.__init__(self, backend, read, write) self._client_capabilities = None self._graph_walker = None + self._stateless_rpc = stateless_rpc + self._advertise_refs = advertise_refs def default_capabilities(self): return ("multi_ack", "side-band-64k", "thin-pack", "ofs-delta") @@ -402,6 +405,12 @@ def next(self): class ReceivePackHandler(Handler): """Protocol handler for downloading a pack to the client.""" + def __init__(self, backend, read, write, + stateless_rpc=False, advertise_refs=False): + Handler.__init__(self, backend, read, write) + self._stateless_rpc = stateless_rpc + self._advertise_refs = advertise_refs + def default_capabilities(self): return ("report-status", "delete-refs") diff --git a/dulwich/web.py b/dulwich/web.py new file mode 100644 index 000000000..4a47dbd55 --- /dev/null +++ b/dulwich/web.py @@ -0,0 +1,275 @@ +# web.py -- WSGI smart-http server +# Copryight (C) 2010 Google, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; version 2 +# or (at your option) any later version of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. + +"""HTTP server for dulwich that implements the git smart HTTP protocol.""" + +from cStringIO import StringIO +import cgi +import os +import re +import time + +from dulwich.objects import ( + Tag, + ) +from dulwich.repo import ( + Repo, + ) +from dulwich.server import ( + GitBackend, + ReceivePackHandler, + UploadPackHandler, + ) + +HTTP_OK = '200 OK' +HTTP_NOT_FOUND = '404 Not Found' +HTTP_FORBIDDEN = '403 Forbidden' + + +def date_time_string(self, timestamp=None): + # Based on BaseHTTPServer.py in python2.5 + weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + months = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + if timestamp is None: + timestamp = time.time() + year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) + return '%s, %02d %3s %4d %02d:%02d:%02d GMD' % ( + weekdays[wd], day, months[month], year, hh, mm, ss) + + +def send_file(req, f, content_type): + """Send a file-like object to the request output. + + :param req: The HTTPGitRequest object to send output to. + :param f: An open file-like object to send; will be closed. + :param content_type: The MIME type for the file. + :yield: The contents of the file. + """ + if f is None: + yield req.not_found('File not found') + return + try: + try: + req.respond(HTTP_OK, content_type) + while True: + data = f.read(10240) + if not data: + break + yield data + except IOError: + yield req.not_found('Error reading file') + finally: + f.close() + + +def get_text_file(req, backend, mat): + req.nocache() + return send_file(req, backend.repo.get_named_file(mat.group()), + 'text/plain') + + +def get_loose_object(req, backend, mat): + sha = mat.group(1) + mat.group(2) + object_store = backend.object_store + if not object_store.contains_loose(sha): + yield req.not_found('Object not found') + return + try: + data = object_store[sha].as_legacy_object() + except IOError: + yield req.not_found('Error reading object') + req.cache_forever() + req.respond(HTTP_OK, 'application/x-git-loose-object') + yield data + + +def get_pack_file(req, backend, mat): + req.cache_forever() + return send_file(req, backend.repo.get_named_file(mat.group()), + 'application/x-git-packed-objects', False) + + +def get_idx_file(req, backend, mat): + req.cache_forever() + return send_file(req, backend.repo.get_named_file(mat.group()), + 'application/x-git-packed-objects-toc', False) + + +services = {'git-upload-pack': UploadPackHandler, + 'git-receive-pack': ReceivePackHandler} +def get_info_refs(req, backend, mat): + params = cgi.parse_qs(req.environ['QUERY_STRING']) + service = params.get('service', [None])[0] + if service: + handler_cls = services.get(service, None) + if handler_cls is None: + yield req.forbidden('Unsupported service %s' % service) + req.nocache() + req.respond(HTTP_OK, 'application/x-%s-advertisement' % service) + output = StringIO() + dummy_input = StringIO() # GET request, handler doesn't need to read + handler = handler_cls(backend, dummy_input.read, output.write, + stateless_rpc=True, advertise_refs=True) + handler.proto.write_pkt_line('# service=%s\n' % service) + handler.proto.write_pkt_line(None) + handler.handle() + yield output.getvalue() + else: + # non-smart fallback + # TODO: select_getanyfile() (see http-backend.c) + req.nocache() + req.respond(HTTP_OK, 'text/plain') + refs = backend.get_refs() + for name in sorted(refs.iterkeys()): + # get_refs() includes HEAD as a special case, but we don't want to + # advertise it + if name == 'HEAD': + continue + sha = refs[name] + o = backend.repo[sha] + if not o: + continue + yield '%s\t%s\n' % (sha, name) + if isinstance(o, Tag): + while isinstance(o, Tag): + _, sha = o.object + o = backend.repo[sha] + if not o: + continue + yield '%s\t%s^{}\n' % (o.sha(), name) + + +def get_info_packs(req, backend, mat): + req.nocache() + req.respond(HTTP_OK, 'text/plain') + for pack in backend.object_store.packs: + yield 'P pack-%s.pack\n' % pack.name() + + +def handle_service_request(req, backend, mat): + service = mat.group().lstrip('/') + handler_cls = services.get(service, None) + if handler_cls is None: + yield req.forbidden('Unsupported service %s' % service) + req.nocache() + req.respond(HTTP_OK, 'application/x-%s-response' % service) + + output = StringIO() + input = req.environ['wsgi.input'] + handler = handler_cls(backend, input.read, output.write, stateless_rpc=True) + handler.handle() + yield output.getvalue() + + +class HTTPGitRequest(object): + """Class encapsulating the state of a single git HTTP request. + + :ivar environ: the WSGI environment for the request. + """ + + def __init__(self, environ, start_response): + self.environ = environ + self._start_response = start_response + self._cache_headers = [] + self._headers = [] + + def add_header(self, name, value): + """Add a header to the response.""" + self._headers.append((name, value)) + + def respond(self, status=HTTP_OK, content_type=None, headers=None): + """Begin a response with the given status and other headers.""" + if headers: + self._headers.extend(headers) + if content_type: + self._headers.append(('Content-Type', content_type)) + self._headers.extend(self._cache_headers) + + self._start_response(status, self._headers) + + def not_found(self, message): + """Begin a HTTP 404 response and return the text of a message.""" + self._cache_headers = [] + self.respond(HTTP_NOT_FOUND, 'text/plain') + return message + + def forbidden(self, message): + """Begin a HTTP 403 response and return the text of a message.""" + self._cache_headers = [] + self.respond(HTTP_FORBIDDEN, 'text/plain') + return message + + def nocache(self): + """Set the response to never be cached by the client.""" + self._cache_headers = [ + ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'), + ('Pragma', 'no-cache'), + ('Cache-Control', 'no-cache, max-age=0, must-revalidate'), + ] + + def cache_forever(self): + """Set the response to be cached forever by the client.""" + now = time.time() + self._cache_headers = [ + ('Date', date_time_string(now)), + ('Expires', date_time_string(now + 31536000)), + ('Cache-Control', 'public, max-age=31536000'), + ] + + +class HTTPGitApplication(object): + """Class encapsulating the state of a git WSGI application. + + :ivar backend: the Backend object backing this application + """ + + services = { + ('GET', re.compile('/HEAD$')): get_text_file, + ('GET', re.compile('/info/refs$')): get_info_refs, + ('GET', re.compile('/objects/info/alternates$')): get_text_file, + ('GET', re.compile('/objects/info/http-alternates$')): get_text_file, + ('GET', re.compile('/objects/info/packs$')): get_info_packs, + ('GET', re.compile('/objects/([0-9a-f]{2})/([0-9a-f]{38})$')): get_loose_object, + ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.pack$')): get_pack_file, + ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.idx$')): get_idx_file, + + ('POST', re.compile('/git-upload-pack$')): handle_service_request, + ('POST', re.compile('/git-receive-pack$')): handle_service_request, + } + + def __init__(self, backend): + self.backend = backend + + def __call__(self, environ, start_response): + path = environ['PATH_INFO'] + method = environ['REQUEST_METHOD'] + req = HTTPGitRequest(environ, start_response) + # environ['QUERY_STRING'] has qs args + handler = None + for smethod, spath in self.services.iterkeys(): + if smethod != method: + continue + mat = spath.search(path) + if mat: + handler = self.services[smethod, spath] + break + if handler is None: + return req.not_found('Sorry, that method is not supported') + return handler(req, self.backend, mat) diff --git a/setup.py b/setup.py index 7f87c018e..0b0f219e2 100755 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ in one of the Monty Python sketches. """, packages=['dulwich', 'dulwich.tests'], - scripts=['bin/dulwich', 'bin/dul-daemon'], + scripts=['bin/dulwich', 'bin/dul-daemon', 'bin/dul-web'], features = {'speedups': speedups}, ext_modules = mandatory_ext_modules, )