Skip to content

Commit

Permalink
merge support for use with in-memory packs.
Browse files Browse the repository at this point in the history
  • Loading branch information
jelmer committed Feb 12, 2010
2 parents fb1513b + 17063a6 commit 6196883
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 68 deletions.
148 changes: 81 additions & 67 deletions dulwich/pack.py
Expand Up @@ -72,50 +72,45 @@
(sys.version_info[0] == 2 and sys.version_info[1] >= 6))


def take_msb_bytes(map, offset):
def take_msb_bytes(read):
"""Read bytes marked with most significant bit.
:param map: The buffer.
:param offset: Offset in the buffer at which to start reading.
:param read: Read function
"""
ret = []
while len(ret) == 0 or ret[-1] & 0x80:
ret.append(ord(map[offset]))
offset += 1
ret.append(ord(read(1)))
return ret


def read_zlib_chunks(data, offset):
def read_zlib_chunks(read, buffer_size=4096):
"""Read chunks of zlib data from a buffer.
:param data: Buffer to read from
:param offset: Offset at which to start reading
:param read: Read function
:return: Tuple with list of chunks and length of
compressed data length
"""
obj = zlib.decompressobj()
ret = []
fed = 0
while obj.unused_data == "":
base = offset+fed
add = data[base:base+1024]
if len(add) < 1024:
add = read(buffer_size)
if len(add) < buffer_size:
add += "Z"
fed += len(add)
ret.append(obj.decompress(add))
comp_len = fed-len(obj.unused_data)
return ret, comp_len


def read_zlib(data, offset, dec_size):
def read_zlib(read, dec_size):
"""Read zlib-compressed data from a buffer.
:param data: Buffer
:param offset: Offset in the buffer at which to read
:param read: Read function
:param dec_size: Size of the decompressed buffer
:return: Uncompressed buffer and compressed buffer length.
"""
ret, comp_len = read_zlib_chunks(data, offset)
ret, comp_len = read_zlib_chunks(read)
x = "".join(ret)
assert len(x) == dec_size
return x, comp_len
Expand All @@ -133,35 +128,31 @@ def iter_sha1(iter):
return sha1.hexdigest()


def simple_mmap(f, offset, size):
"""Simple wrapper for mmap() which always supports the offset parameter.
def load_pack_index(path):
"""Load an index file by path.
:param f: File object.
:param offset: Offset in the file, from the beginning of the file.
:param size: Size of the mmap'ed area
:param access: Access mechanism.
:return: MMAP'd area.
:param filename: Path to the index file
"""
mem = mmap.mmap(f.fileno(), size+offset, access=mmap.ACCESS_READ)
return mem, offset
f = GitFile(path, 'rb')
return load_pack_index_file(path, f)


def load_pack_index(filename):
"""Load an index file by path.
def load_pack_index_file(path, f):
"""Load an index file from a file-like object.
:param filename: Path to the index file
:param path: Path for the index file
:param f: File-like object
"""
f = GitFile(filename, 'rb')
if f.read(4) == '\377tOc':
version = struct.unpack(">L", f.read(4))[0]
if version == 2:
f.seek(0)
return PackIndex2(filename, file=f)
return PackIndex2(path, file=f)
else:
raise KeyError("Unknown pack index format %d" % version)
else:
f.seek(0)
return PackIndex1(filename, file=f)
return PackIndex1(path, file=f)


def bisect_find_sha(start, end, sha, unpack_name):
Expand Down Expand Up @@ -201,7 +192,7 @@ class PackIndex(object):
the start and end offset and then bisect in to find if the value is present.
"""

def __init__(self, filename, file=None):
def __init__(self, filename, file=None, size=None):
"""Create a pack index object.
Provide it with the name of the index file to consider, and it will map
Expand All @@ -210,13 +201,23 @@ def __init__(self, filename, file=None):
self._filename = filename
# Take the size now, so it can be checked each time we map the file to
# ensure that it hasn't changed.
self._size = os.path.getsize(filename)
if file is None:
self._file = GitFile(filename, 'rb')
else:
self._file = file
self._contents, map_offset = simple_mmap(self._file, 0, self._size)
assert map_offset == 0
fileno = getattr(self._file, 'fileno', None)
if fileno is not None:
fd = self._file.fileno()
if size is None:
self._size = os.fstat(fd).st_size
else:
self._size = size
self._contents = mmap.mmap(fd, self._size,
access=mmap.ACCESS_READ)
else:
self._file.seek(0)
self._contents = self._file.read()
self._size = len(self._contents)

def __eq__(self, other):
if not isinstance(other, PackIndex):
Expand Down Expand Up @@ -347,8 +348,8 @@ def _object_index(self, sha):
class PackIndex1(PackIndex):
"""Version 1 Pack Index."""

def __init__(self, filename, file=None):
PackIndex.__init__(self, filename, file)
def __init__(self, filename, file=None, size=None):
PackIndex.__init__(self, filename, file, size)
self.version = 1
self._fan_out_table = self._read_fan_out_table(0)

Expand All @@ -373,8 +374,8 @@ def _unpack_crc32_checksum(self, i):
class PackIndex2(PackIndex):
"""Version 2 Pack Index."""

def __init__(self, filename, file=None):
PackIndex.__init__(self, filename, file)
def __init__(self, filename, file=None, size=None):
PackIndex.__init__(self, filename, file, size)
assert self._contents[:4] == '\377tOc', "Not a v2 pack index file"
(self.version, ) = unpack_from(">L", self._contents, 4)
assert self.version == 2, "Version was %d" % self.version
Expand Down Expand Up @@ -414,36 +415,37 @@ def read_pack_header(f):
return (version, num_objects)


def unpack_object(map, offset=0):
def unpack_object(read):
"""Unpack a Git object.
:return: tuple with type, uncompressed data and compressed size
"""
bytes = take_msb_bytes(map, offset)
bytes = take_msb_bytes(read)
type = (bytes[0] >> 4) & 0x07
size = bytes[0] & 0x0f
for i, byte in enumerate(bytes[1:]):
size += (byte & 0x7f) << ((i * 7) + 4)
raw_base = len(bytes)
if type == 6: # offset delta
bytes = take_msb_bytes(map, raw_base + offset)
bytes = take_msb_bytes(read)
raw_base += len(bytes)
assert not (bytes[-1] & 0x80)
delta_base_offset = bytes[0] & 0x7f
for byte in bytes[1:]:
delta_base_offset += 1
delta_base_offset <<= 7
delta_base_offset += (byte & 0x7f)
raw_base+=len(bytes)
uncomp, comp_len = read_zlib(map, offset + raw_base, size)
uncomp, comp_len = read_zlib(read, size)
assert size == len(uncomp)
return type, (delta_base_offset, uncomp), comp_len+raw_base
elif type == 7: # ref delta
basename = map[offset+raw_base:offset+raw_base+20]
uncomp, comp_len = read_zlib(map, offset+raw_base+20, size)
basename = map.read(20)
raw_base += 20
uncomp, comp_len = read_zlib(read, size)
assert size == len(uncomp)
return type, (basename, uncomp), comp_len+raw_base+20
return type, (basename, uncomp), comp_len+raw_base
else:
uncomp, comp_len = read_zlib(map, offset+raw_base, size)
uncomp, comp_len = read_zlib(read, size)
assert len(uncomp) == size
return type, uncomp, comp_len+raw_base

Expand Down Expand Up @@ -484,7 +486,7 @@ class PackData(object):
It will all just throw a zlib or KeyError.
"""

def __init__(self, filename):
def __init__(self, filename, file=None, size=None):
"""Create a PackData object that represents the pack in the given filename.
The file must exist and stay readable until the object is disposed of. It
Expand All @@ -494,14 +496,28 @@ def __init__(self, filename):
mmap implementation is flawed.
"""
self._filename = filename
self._size = os.path.getsize(filename)
if size is None:
self._size = os.path.getsize(filename)
else:
self._size = size
self._header_size = 12
assert self._size >= self._header_size, "%s is too small for a packfile (%d < %d)" % (filename, self._size, self._header_size)
self._file = GitFile(self._filename, 'rb')
if file is None:
self._file = GitFile(self._filename, 'rb')
else:
self._file = file
self._read_header()
self._offset_cache = LRUSizeCache(1024*1024*20,
compute_size=_compute_object_size)

@classmethod
def from_file(cls, file, size):
return cls(str(file), file=file, size=size)

@classmethod
def from_path(cls, path):
return cls(filename=path)

def close(self):
self._file.close()

Expand All @@ -519,11 +535,14 @@ def calculate_checksum(self):
:return: 20-byte binary SHA1 digest
"""
map, map_offset = simple_mmap(self._file, 0, self._size - 20)
try:
return make_sha(map[map_offset:self._size-20]).digest()
finally:
map.close()
s = make_sha()
self._file.seek(0)
todo = self._size - 20
while todo > 0:
x = self._file.read(min(todo, 1<<16))
s.update(x)
todo -= len(x)
return s.digest()

def resolve_object(self, offset, type, obj, get_ref, get_offset=None):
"""Resolve an object, possibly resolving deltas when necessary.
Expand Down Expand Up @@ -566,10 +585,7 @@ def __init__(self, pack):
self.i = 0
self.offset = pack._header_size
self.num = len(pack)
self.map, _ = simple_mmap(pack._file, 0, pack._size)

def __del__(self):
self.map.close()
self.map = pack._file

def __iter__(self):
return self
Expand All @@ -580,8 +596,10 @@ def __len__(self):
def next(self):
if self.i == self.num:
raise StopIteration
(type, obj, total_size) = unpack_object(self.map, self.offset)
crc32 = zlib.crc32(self.map[self.offset:self.offset+total_size]) & 0xffffffff
self.map.seek(self.offset)
(type, obj, total_size) = unpack_object(self.map.read)
self.map.seek(self.offset)
crc32 = zlib.crc32(self.map.read(total_size)) & 0xffffffff
ret = (self.offset, type, obj, crc32)
self.offset += total_size
if progress:
Expand Down Expand Up @@ -705,12 +723,8 @@ def get_object_at(self, offset):
assert isinstance(offset, long) or isinstance(offset, int),\
"offset was %r" % offset
assert offset >= self._header_size
map, map_offset = simple_mmap(self._file, offset, self._size-offset)
try:
ret = unpack_object(map, map_offset)[:2]
return ret
finally:
map.close()
self._file.seek(offset)
return unpack_object(self._file.read)[:2]


class SHA1Reader(object):
Expand Down
3 changes: 2 additions & 1 deletion dulwich/tests/test_pack.py
Expand Up @@ -21,6 +21,7 @@
"""Tests for Dulwich packs."""


from cStringIO import StringIO
import os
import unittest

Expand Down Expand Up @@ -283,5 +284,5 @@ class ZlibTests(unittest.TestCase):

def test_simple_decompress(self):
self.assertEquals(("tree 4ada885c9196b6b6fa08744b5862bf92896fc002\nparent None\nauthor Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\ncommitter Jelmer Vernooij <jelmer@samba.org> 1228980214 +0000\n\nProvide replacement for mmap()'s offset argument.", 158),
read_zlib(TEST_COMP1, 0, 229))
read_zlib(StringIO(TEST_COMP1).read, 229))

0 comments on commit 6196883

Please sign in to comment.