Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
1595 lines (1349 sloc) 48.3 KB
##############################################################################
#
# Copyright 2011 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Python BTree implementation
"""
from struct import Struct
from struct import error as struct_error
from operator import index
from persistent import Persistent
from .Interfaces import BTreesConflictError
from ._compat import PY3
from ._compat import compare
from ._compat import int_types
from ._compat import xrange
_marker = object()
class _Base(Persistent):
__slots__ = ()
_key_type = list
def __init__(self, items=None):
self.clear()
if items:
self.update(items)
try:
# Detect the presence of the C extensions.
# If they're NOT around, we don't need to do any of the
# special pickle support to make Python versions look like
# C---we just rename the classes. By not defining these methods,
# we can (theoretically) avoid a bit of a slowdown.
# If the C extensions are around, we do need these methods, but
# these classes are unlikely to be used in production anyway.
__import__('BTrees._OOBTree')
except ImportError: # pragma: no cover
pass
else:
def __reduce__(self):
# Swap out the type constructor for the C version, if present.
func, typ_gna, state = Persistent.__reduce__(self)
# We ignore the returned type altogether in favor of
# our calculated class (which allows subclasses but replaces our exact
# type with the C equivalent)
typ = self.__class__
gna = typ_gna[1:]
return (func, (typ,) + gna, state)
@property
def __class__(self):
type_self = type(self)
return type_self._BTree_reduce_as if type_self._BTree_reduce_up_bound is type_self else type_self
@property
def _BTree_reduce_as(self):
# Return the pickle replacement class for this object.
# If the C extensions are available, this will be the
# C type (setup by _fix_pickle), otherwise it will be the real
# type of this object.
# This implementation is replaced by _fix_pickle and exists for
# testing purposes.
return type(self) # pragma: no cover
_BTree_reduce_up_bound = _BTree_reduce_as
class _BucketBase(_Base):
__slots__ = ('_keys',
'_next',
'_to_key',
)
def clear(self):
self._keys = self._key_type()
self._next = None
def __len__(self):
return len(self._keys)
@property
def size(self):
return len(self._keys)
def _deleteNextBucket(self):
next = self._next
if next is not None:
self._next = next._next
def _search(self, key):
# Return non-negative index on success
# return -(insertion_index + 1) on fail
low = 0
keys = self._keys
high = len(keys)
while low < high:
i = (low + high) // 2
k = keys[i]
if k is key or k == key:
return i
if compare(k, key) < 0:
low = i + 1
else:
high = i
return -1 - low
def minKey(self, key=_marker):
if key is _marker or key is None:
return self._keys[0]
key = self._to_key(key)
index = self._search(key)
if index >= 0:
return key
index = -index - 1
if index < len(self._keys):
return self._keys[index]
else:
raise ValueError("no key satisfies the conditions")
def maxKey(self, key=_marker):
if key is _marker or key is None:
return self._keys[-1]
key = self._to_key(key)
index = self._search(key)
if index >= 0:
return key
else:
index = -index-1
if index:
return self._keys[index-1]
else:
raise ValueError("no key satisfies the conditions")
def _range(self, min=_marker, max=_marker,
excludemin=False, excludemax=False):
if min is _marker or min is None:
start = 0
if excludemin:
start = 1
else:
min = self._to_key(min)
start = self._search(min)
if start >= 0:
if excludemin:
start += 1
else:
start = -start - 1
if max is _marker or max is None:
end = len(self._keys)
if excludemax:
end -= 1
else:
max = self._to_key(max)
end = self._search(max)
if end >= 0:
if not excludemax:
end += 1
else:
end = -end - 1
return start, end
def keys(self, *args, **kw):
start, end = self._range(*args, **kw)
return self._keys[start:end]
def iterkeys(self, *args, **kw):
if not (args or kw):
return iter(self._keys)
keys = self._keys
return (keys[i] for i in xrange(*self._range(*args, **kw)))
def __iter__(self):
return iter(self._keys)
def __contains__(self, key):
return (self._search(self._to_key(key)) >= 0)
has_key = __contains__
def _repr_helper(self, items):
type_self = type(self)
mod = type_self.__module__
name = type_self.__name__
name = name[:-2] if name.endswith("Py") else name
return "%s.%s(%r)" % (mod, name, items)
class _SetIteration(object):
__slots__ = ('to_iterate',
'useValues',
'_iter',
'active',
'position',
'key',
'value',
)
def __init__(self, to_iterate, useValues=False, default=None):
if to_iterate is None:
to_iterate = ()
self.to_iterate = to_iterate
if useValues:
try:
itmeth = to_iterate.iteritems
except AttributeError:
if PY3 and isinstance(to_iterate, dict): #pragma no cover Py3k
itmeth = to_iterate.items().__iter__
else:
itmeth = to_iterate.__iter__
useValues = False
else:
self.value = None
else:
itmeth = to_iterate.__iter__
self.useValues = useValues
self._iter = itmeth()
self.active = True
self.position = 0
self.key = _marker
self.value = default
self.advance()
def advance(self):
try:
if self.useValues:
self.key, self.value = next(self._iter)
else:
self.key = next(self._iter)
self.position += 1
except StopIteration:
self.active = False
self.position = -1
return self
_object_lt = getattr(object, '__lt__', _marker)
def _no_default_comparison(key):
# Enforce test that key has non-default comparison.
if key is None:
return
if type(key) is object:
raise TypeError("Can't use object() as keys")
lt = getattr(key, '__lt__', None)
if lt is not None:
# CPython 3.x follows PEP 252, defining '__objclass__'
if getattr(lt, '__objclass__', None) is object:
lt = None # pragma: no cover Py3k
# PyPy3 doesn't follow PEP 252, but defines '__func__'
elif getattr(lt, '__func__', None) is _object_lt:
lt = None # pragma: no cover PyPy3
if (lt is None and
getattr(key, '__cmp__', None) is None):
raise TypeError("Object has default comparison")
class Bucket(_BucketBase):
__slots__ = ()
_value_type = list
_to_value = lambda self, x: x
VALUE_SAME_CHECK = False
def setdefault(self, key, value):
key, value = self._to_key(key), self._to_value(value)
status, value = self._set(key, value, True)
return value
def pop(self, key, default=_marker):
try:
status, value = self._del(self._to_key(key))
except KeyError:
if default is _marker:
raise
return default
else:
return value
def update(self, items):
if hasattr(items, 'iteritems'):
items = items.iteritems()
elif hasattr(items, 'items'):
items = items.items()
_si = self.__setitem__
try:
for key, value in items:
_si(key, value)
except ValueError:
raise TypeError('items must be a sequence of 2-tuples')
def __setitem__(self, key, value):
_no_default_comparison(key)
self._set(self._to_key(key), self._to_value(value))
def __delitem__(self, key):
self._del(self._to_key(key))
def clear(self):
_BucketBase.clear(self)
self._values = self._value_type()
def get(self, key, default=None):
index = self._search(self._to_key(key))
if index < 0:
return default
return self._values[index]
def __getitem__(self, key):
index = self._search(self._to_key(key))
if index < 0:
raise KeyError(key)
return self._values[index]
def _set(self, key, value, ifunset=False):
"""Set a value
Return: status, value
Status is:
None if no change
0 if change, but not size change
1 if change and size change
"""
index = self._search(key)
if index >= 0:
if (ifunset or
self.VALUE_SAME_CHECK and value == self._values[index]
):
return None, self._values[index]
self._p_changed = True
self._values[index] = value
return 0, value
else:
self._p_changed = True
index = -index - 1
self._keys.insert(index, key)
self._values.insert(index, value)
return 1, value
def _del(self, key):
index = self._search(key)
if index >= 0:
self._p_changed = True
del self._keys[index]
return 0, self._values.pop(index)
raise KeyError(key)
def _split(self, index=-1):
if index < 0 or index >= len(self._keys):
index = len(self._keys) // 2
new_instance = type(self)()
new_instance._keys = self._keys[index:]
new_instance._values = self._values[index:]
del self._keys[index:]
del self._values[index:]
new_instance._next = self._next
self._next = new_instance
return new_instance
def values(self, *args, **kw):
start, end = self._range(*args, **kw)
return self._values[start:end]
def itervalues(self, *args, **kw):
values = self._values
return (values[i] for i in xrange(*self._range(*args, **kw)))
def items(self, *args, **kw):
keys = self._keys
values = self._values
return [(keys[i], values[i])
for i in xrange(*self._range(*args, **kw))]
def iteritems(self, *args, **kw):
keys = self._keys
values = self._values
return ((keys[i], values[i])
for i in xrange(*self._range(*args, **kw)))
def __getstate__(self):
keys = self._keys
values = self._values
data = []
for i in range(len(keys)):
data.append(keys[i])
data.append(values[i])
data = tuple(data)
if self._next is not None:
return data, self._next
return (data, )
def __setstate__(self, state):
if not isinstance(state[0], tuple):
raise TypeError("tuple required for first state element")
self.clear()
if len(state) == 2:
state, self._next = state
else:
self._next = None
state = state[0]
keys = self._keys
values = self._values
for i in range(0, len(state), 2):
keys.append(state[i])
values.append(state[i+1])
def _p_resolveConflict(self, s_old, s_com, s_new):
b_old = type(self)()
if s_old is not None:
b_old.__setstate__(s_old)
b_com = type(self)()
if s_com is not None:
b_com.__setstate__(s_com)
b_new = type(self)()
if s_new is not None:
b_new.__setstate__(s_new)
if (b_com._next != b_old._next or
b_new._next != b_old._next):
raise BTreesConflictError(-1, -1, -1, 0)
if not b_com or not b_new:
raise BTreesConflictError(-1, -1, -1, 12)
i_old = _SetIteration(b_old, True)
i_com = _SetIteration(b_com, True)
i_new = _SetIteration(b_new, True)
def merge_error(reason):
return BTreesConflictError(
i_old.position, i_com.position, i_new.position, reason)
result = type(self)()
def merge_output(it):
result._keys.append(it.key)
result._values.append(it.value)
it.advance()
while i_old.active and i_com.active and i_new.active:
cmpOC = compare(i_old.key, i_com.key)
cmpON = compare(i_old.key, i_new.key)
if cmpOC == 0:
if cmpON == 0:
if i_com.value == i_old.value:
result[i_old.key] = i_new.value
elif i_new.value == i_old.value:
result[i_old.key] = i_com.value
else:
raise merge_error(1)
i_old.advance()
i_com.advance()
i_new.advance()
elif (cmpON > 0): # insert in new
merge_output(i_new)
elif i_old.value == i_com.value: # deleted new
if i_new.position == 1:
# Deleted the first item. This will modify the
# parent node, so we don't know if merging will be
# safe
raise merge_error(13)
i_old.advance()
i_com.advance()
else:
raise merge_error(2)
elif cmpON == 0:
if cmpOC > 0: # insert committed
merge_output(i_com)
elif i_old.value == i_new.value: # delete committed
if i_com.position == 1:
# Deleted the first item. This will modify the
# parent node, so we don't know if merging will be
# safe
raise merge_error(13)
i_old.advance()
i_new.advance()
else:
raise merge_error(3)
else: # both keys changed
cmpCN = compare(i_com.key, i_new.key)
if cmpCN == 0: # dueling insert
raise merge_error(4)
if cmpOC > 0: # insert committed
if cmpCN > 0: # insert i_new first
merge_output(i_new)
else:
merge_output(i_com)
elif cmpON > 0: # insert i_new
merge_output(i_new)
else:
raise merge_error(5) # both deleted same key
while i_com.active and i_new.active: # new inserts
cmpCN = compare(i_com.key, i_new.key)
if cmpCN == 0:
raise merge_error(6) # dueling insert
if cmpCN > 0: # insert new
merge_output(i_new)
else: # insert committed
merge_output(i_com)
while i_old.active and i_com.active: # new deletes rest of original
cmpOC = compare(i_old.key, i_com.key)
if cmpOC > 0: # insert committed
merge_output(i_com)
elif cmpOC == 0 and (i_old.value == i_com.value): # del in new
i_old.advance()
i_com.advance()
else: # dueling deletes or delete and change
raise merge_error(7)
while i_old.active and i_new.active:
# committed deletes rest of original
cmpON = compare(i_old.key, i_new.key)
if cmpON > 0: # insert new
merge_output(i_new)
elif cmpON == 0 and (i_old.value == i_new.value):
# deleted in committed
i_old.advance()
i_new.advance()
else: # dueling deletes or delete and change
raise merge_error(8)
if i_old.active: # dueling deletes
raise merge_error(9)
while i_com.active:
merge_output(i_com)
while i_new.active:
merge_output(i_new)
if len(result._keys) == 0: #pragma: no cover
# If the output bucket is empty, conflict resolution doesn't have
# enough info to unlink it from its containing BTree correctly.
#
# XXX TS, 2012-11-16: I don't think this is possible
#
raise merge_error(10)
result._next = b_old._next
return result.__getstate__()
def __repr__(self):
return self._repr_helper(self.items())
class Set(_BucketBase):
__slots__ = ()
def add(self, key):
return self._set(self._to_key(key))[0]
insert = add
def remove(self, key):
self._del(self._to_key(key))
def update(self, items):
add = self.add
for i in items:
add(i)
def __getstate__(self):
data = tuple(self._keys)
if self._next is not None:
return data, self._next
return (data, )
def __setstate__(self, state):
if not isinstance(state[0], tuple):
raise TypeError('tuple required for first state element')
self.clear()
if len(state) == 2:
state, self._next = state
else:
self._next = None
state = state[0]
self._keys.extend(state)
def _set(self, key, value=None, ifunset=False):
index = self._search(key)
if index < 0:
index = -index - 1
self._p_changed = True
self._keys.insert(index, key)
return True, None
return False, None
def _del(self, key):
index = self._search(key)
if index >= 0:
self._p_changed = True
del self._keys[index]
return 0, 0
raise KeyError(key)
def __getitem__(self, i):
return self._keys[i]
def _split(self, index=-1):
if index < 0 or index >= len(self._keys):
index = len(self._keys) // 2
new_instance = type(self)()
new_instance._keys = self._keys[index:]
del self._keys[index:]
new_instance._next = self._next
self._next = new_instance
return new_instance
def _p_resolveConflict(self, s_old, s_com, s_new):
b_old = type(self)()
if s_old is not None:
b_old.__setstate__(s_old)
b_com = type(self)()
if s_com is not None:
b_com.__setstate__(s_com)
b_new = type(self)()
if s_new is not None:
b_new.__setstate__(s_new)
if (b_com._next != b_old._next or
b_new._next != b_old._next): # conflict: com or new changed _next
raise BTreesConflictError(-1, -1, -1, 0)
if not b_com or not b_new: # conflict: com or new empty
raise BTreesConflictError(-1, -1, -1, 12)
i_old = _SetIteration(b_old, True)
i_com = _SetIteration(b_com, True)
i_new = _SetIteration(b_new, True)
def merge_error(reason):
return BTreesConflictError(
i_old.position, i_com.position, i_new.position, reason)
result = type(self)()
def merge_output(it):
result._keys.append(it.key)
it.advance()
while i_old.active and i_com.active and i_new.active:
cmpOC = compare(i_old.key, i_com.key)
cmpON = compare(i_old.key, i_new.key)
if cmpOC == 0:
if cmpON == 0: # all match
merge_output(i_old)
i_com.advance()
i_new.advance()
elif cmpON > 0: # insert in new
merge_output(i_new)
else: # deleted new
if i_new.position == 1:
# Deleted the first item. This will modify the
# parent node, so we don't know if merging will be
# safe
raise merge_error(13)
i_old.advance()
i_com.advance()
elif cmpON == 0:
if cmpOC > 0: # insert committed
merge_output(i_com)
else: # delete committed
if i_com.position == 1:
# Deleted the first item. This will modify the
# parent node, so we don't know if merging will be
# safe
raise merge_error(13)
i_old.advance()
i_new.advance()
else: # both com and new keys changed
cmpCN = compare(i_com.key, i_new.key)
if cmpCN == 0: # both inserted same key
raise merge_error(4)
if cmpOC > 0: # insert committed
if cmpCN > 0: # insert i_new first
merge_output(i_new)
else:
merge_output(i_com)
elif cmpON > 0: # insert i_new
merge_output(i_new)
else: # both com and new deleted same key
raise merge_error(5)
while i_com.active and i_new.active: # new inserts
cmpCN = compare(i_com.key, i_new.key)
if cmpCN == 0: # dueling insert
raise merge_error(6)
if cmpCN > 0: # insert new
merge_output(i_new)
else: # insert committed
merge_output(i_com)
while i_old.active and i_com.active: # new deletes rest of original
cmpOC = compare(i_old.key, i_com.key)
if cmpOC > 0: # insert committed
merge_output(i_com)
elif cmpOC == 0: # del in new
i_old.advance()
i_com.advance()
else: # dueling deletes or delete and change
raise merge_error(7)
while i_old.active and i_new.active:
# committed deletes rest of original
cmpON = compare(i_old.key, i_new.key)
if cmpON > 0: # insert new
merge_output(i_new)
elif cmpON == 0: # deleted in committed
i_old.advance()
i_new.advance()
else: # dueling deletes or delete and change
raise merge_error(8)
if i_old.active: # dueling deletes
raise merge_error(9)
while i_com.active:
merge_output(i_com)
while i_new.active:
merge_output(i_new)
if len(result._keys) == 0: #pragma: no cover
# If the output bucket is empty, conflict resolution doesn't have
# enough info to unlink it from its containing BTree correctly.
#
# XXX TS, 2012-11-16: I don't think this is possible
#
raise merge_error(10)
result._next = b_old._next
return result.__getstate__()
def __repr__(self):
return self._repr_helper(self._keys)
class _TreeItem(object):
__slots__ = ('key',
'child',
)
def __init__(self, key, child):
self.key = key
self.child = child
class _Tree(_Base):
__slots__ = ('_data',
'_firstbucket',
)
def __new__(cls, *args):
value = _Base.__new__(cls, *args)
# Empty trees don't get their __setstate__ called upon
# unpickling (or __init__, obviously), so clear() is never called
# and _data and _firstbucket are never defined, unless we do it here.
value._data = []
value._firstbucket = None
return value
def setdefault(self, key, value):
return self._set(self._to_key(key), self._to_value(value), True)[1]
def pop(self, key, default=_marker):
try:
return self._del(self._to_key(key))[1]
except KeyError:
if default is _marker:
raise
return default
def update(self, items):
if hasattr(items, 'iteritems'):
items = items.iteritems()
elif hasattr(items, 'items'):
items = items.items()
set = self.__setitem__
for i in items:
set(*i)
def __setitem__(self, key, value):
_no_default_comparison(key)
self._set(self._to_key(key), self._to_value(value))
def __delitem__(self, key):
self._del(self._to_key(key))
def clear(self):
if self._data:
# In the case of __init__, this was already set by __new__
self._data = []
self._firstbucket = None
def __nonzero__(self):
return bool(self._data)
__bool__ = __nonzero__ #Py3k rename
def __len__(self):
l = 0
bucket = self._firstbucket
while bucket is not None:
l += len(bucket._keys)
bucket = bucket._next
return l
@property
def size(self):
return len(self._data)
def _search(self, key):
data = self._data
if data:
lo = 0
hi = len(data)
i = hi // 2
while i > lo:
cmp_ = compare(data[i].key, key)
if cmp_ < 0:
lo = i
elif cmp_ > 0:
hi = i
else:
break
i = (lo + hi) // 2
return i
return -1
def _findbucket(self, key):
index = self._search(key)
if index >= 0:
child = self._data[index].child
if isinstance(child, self._bucket_type):
return child
return child._findbucket(key)
def __contains__(self, key):
try:
tree_key = self._to_key(key)
except TypeError:
# Can't convert the key, so can't possibly be in the tree
return False
return key in (self._findbucket(tree_key) or ())
def has_key(self, key):
index = self._search(key)
if index < 0:
return False
r = self._data[index].child.has_key(key)
return r and r + 1
def keys(self, min=_marker, max=_marker,
excludemin=False, excludemax=False,
itertype='iterkeys'):
if not self._data:
return ()
if min is not _marker and min is not None:
min = self._to_key(min)
bucket = self._findbucket(min)
else:
bucket = self._firstbucket
iterargs = min, max, excludemin, excludemax
return _TreeItems(bucket, itertype, iterargs)
def iterkeys(self, min=_marker, max=_marker,
excludemin=False, excludemax=False):
return iter(self.keys(min, max, excludemin, excludemax))
def __iter__(self):
return iter(self.keys())
def minKey(self, min=_marker):
if min is _marker or min is None:
bucket = self._firstbucket
else:
min = self._to_key(min)
bucket = self._findbucket(min)
if bucket is not None:
return bucket.minKey(min)
raise ValueError('empty tree')
def maxKey(self, max=_marker):
data = self._data
if not data:
raise ValueError('empty tree')
if max is _marker or max is None:
return data[-1].child.maxKey()
max = self._to_key(max)
index = self._search(max)
if index and compare(data[index].child.minKey(), max) > 0:
index -= 1 #pragma: no cover no idea how to provoke this
return data[index].child.maxKey(max)
def _set(self, key, value=None, ifunset=False):
if (self._p_jar is not None and
self._p_oid is not None and
self._p_serial is not None):
self._p_jar.readCurrent(self)
data = self._data
if data:
index = self._search(key)
child = data[index].child
else:
index = 0
child = self._bucket_type()
self._firstbucket = child
data.append(_TreeItem(None, child))
result = child._set(key, value, ifunset)
grew = result[0]
if grew:
if type(child) is type(self):
max_size = self.max_internal_size
else:
max_size = self.max_leaf_size
if child.size > max_size:
self._grow(child, index)
# If a BTree contains only a single bucket, BTree.__getstate__()
# includes the bucket's entire state, and the bucket doesn't get
# an oid of its own. So if we have a single oid-less bucket that
# changed, it's *our* oid that should be marked as changed -- the
# bucket doesn't have one.
if (grew is not None and
type(child) is self._bucket_type and
len(data) == 1 and
child._p_oid is None):
self._p_changed = 1
return result
def _grow(self, child, index):
self._p_changed = True
new_child = child._split()
self._data.insert(index+1, _TreeItem(new_child.minKey(), new_child))
if len(self._data) >= self.max_internal_size * 2:
self._split_root()
def _split_root(self):
child = type(self)()
child._data = self._data
child._firstbucket = self._firstbucket
self._data = [_TreeItem(None, child)]
self._grow(child, 0)
def _split(self, index=None):
data = self._data
if index is None:
index = len(data) // 2
next = type(self)()
next._data = data[index:]
first = data[index]
del data[index:]
if len(data) == 0:
self._firstbucket = None # lost our bucket, can't buy no beer
if isinstance(first.child, type(self)):
next._firstbucket = first.child._firstbucket
else:
next._firstbucket = first.child;
return next
def _del(self, key):
if (self._p_jar is not None and
self._p_oid is not None and
self._p_serial is not None):
self._p_jar.readCurrent(self)
data = self._data
if not data:
raise KeyError(key)
index = self._search(key)
child = data[index].child
removed_first_bucket, value = child._del(key)
# See comment in _set about small trees
if (len(data) == 1 and
type(child) is self._bucket_type and
child._p_oid is None):
self._p_changed = True
# fix up the node key, but not for the 0'th one.
if index > 0 and child.size and compare(key, data[index].key) == 0:
self._p_changed = True
data[index].key = child.minKey()
if removed_first_bucket:
if index:
data[index-1].child._deleteNextBucket()
removed_first_bucket = False # clear flag
else:
self._firstbucket = child._firstbucket
if not child.size:
if type(child) is self._bucket_type:
if index:
data[index-1].child._deleteNextBucket()
else:
self._firstbucket = child._next
removed_first_bucket = True
del data[index]
return removed_first_bucket, value
def _deleteNextBucket(self):
self._data[-1].child._deleteNextBucket()
def __getstate__(self):
data = self._data
if not data:
# Note: returning None here causes our __setstate__
# to not be called on unpickling
return None
if (len(data) == 1 and
type(data[0].child) is not type(self) and
data[0].child._p_oid is None
):
return ((data[0].child.__getstate__(), ), )
sdata = []
for item in data:
if sdata:
sdata.append(item.key)
sdata.append(item.child)
else:
sdata.append(item.child)
return tuple(sdata), self._firstbucket
def __setstate__(self, state):
if state and not isinstance(state[0], tuple):
raise TypeError('tuple required for first state element')
self.clear()
if state is None:
return
if len(state) == 1:
bucket = self._bucket_type()
bucket.__setstate__(state[0][0])
state = [bucket], bucket
data, self._firstbucket = state
data = list(reversed(data))
self._data.append(_TreeItem(None, data.pop()))
while data:
key = data.pop()
child = data.pop()
self._data.append(_TreeItem(key, child))
def _assert(self, condition, message):
if not condition:
raise AssertionError(message)
def _check(self, nextbucket=None):
data = self._data
assert_ = self._assert
if not data:
assert_(self._firstbucket is None,
"Empty BTree has non-NULL firstbucket")
return
assert_(self._firstbucket is not None,
"Non-empty BTree has NULL firstbucket")
child_class = type(data[0].child)
for i in data:
assert_(i.child is not None, "BTree has NULL child")
assert_(type(i.child) is child_class,
"BTree children have different types")
assert_(i.child.size, "Bucket length < 1")
if child_class is type(self):
assert_(self._firstbucket is data[0].child._firstbucket,
"BTree has firstbucket different than "
"its first child's firstbucket")
for i in range(len(data)-1):
data[i].child._check(data[i+1].child._firstbucket)
data[-1].child._check(nextbucket)
elif child_class is self._bucket_type:
assert_(self._firstbucket is data[0].child,
"Bottom-level BTree node has inconsistent firstbucket "
"belief")
for i in range(len(data)-1):
assert_(data[i].child._next is data[i+1].child,
"Bucket next pointer is damaged")
assert_(data[-1].child._next is nextbucket,
"Bucket next pointer is damaged")
else:
assert_(False, "Incorrect child type")
def _p_resolveConflict(self, old, com, new):
s_old = _get_simple_btree_bucket_state(old)
s_com = _get_simple_btree_bucket_state(com)
s_new = _get_simple_btree_bucket_state(new)
return ((
self._bucket_type()._p_resolveConflict(s_old, s_com, s_new), ), )
def __repr__(self):
r = super(_Tree, self).__repr__()
r = r.replace('Py', '')
return r
def _get_simple_btree_bucket_state(state):
if state is None:
return state
if not isinstance(state, tuple):
raise TypeError("_p_resolveConflict: expected tuple or None for state")
if len(state) == 2: # non-degenerate BTree, can't resolve
raise BTreesConflictError(-1, -1, -1, 11)
# Peel away wrapper to get to only-bucket state.
if len(state) != 1:
raise TypeError("_p_resolveConflict: expected 1- or 2-tuple for state")
state = state[0]
if not isinstance(state, tuple) or len(state) != 1:
raise TypeError("_p_resolveConflict: expected 1-tuple containing "
"bucket state")
state = state[0]
if not isinstance(state, tuple):
raise TypeError("_p_resolveConflict: expected tuple for bucket state")
return state
class _TreeItems(object):
__slots__ = ('firstbucket',
'itertype',
'iterargs',
'index',
'it',
'v',
'_len',
)
def __init__(self, firstbucket, itertype, iterargs):
self.firstbucket = firstbucket
self.itertype = itertype
self.iterargs = iterargs
self.index = -1
self.it = iter(self)
self.v = None
self._len = None
def __getitem__(self, i):
if isinstance(i, slice):
return list(self)[i]
if i < 0:
i = len(self) + i
if i < 0:
raise IndexError(i)
if i < self.index:
self.index = -1
self.it = iter(self)
while i > self.index:
try:
self.v = next(self.it)
except StopIteration:
raise IndexError(i)
else:
self.index += 1
return self.v
def __len__(self):
if self._len is None:
i = 0
for _ in self:
i += 1
self._len = i
return self._len
def __iter__(self):
bucket = self.firstbucket
itertype = self.itertype
iterargs = self.iterargs
done = 0
# Note that we don't mind if the first bucket yields no
# results due to an idiosyncrasy in how range searches are done.
while bucket is not None:
for k in getattr(bucket, itertype)(*iterargs):
yield k
done = 0
if done:
return
bucket = bucket._next
done = 1
class _TreeIterator(object):
""" Faux implementation for BBB only.
"""
def __init__(self, items): #pragma: no cover
raise TypeError(
"TreeIterators are private implementation details "
"of the C-based BTrees.\n\n"
"Please use 'iter(tree)', rather than instantiating "
"one directly."
)
class Tree(_Tree):
__slots__ = ()
def get(self, key, default=None):
bucket = self._findbucket(key)
if bucket:
return bucket.get(key, default)
return default
def __getitem__(self, key):
bucket = self._findbucket(key)
if bucket:
return bucket[key]
raise KeyError(key)
def values(self, min=_marker, max=_marker,
excludemin=False, excludemax=False):
return self.keys(min, max, excludemin, excludemax, 'itervalues')
def itervalues(self, min=_marker, max=_marker,
excludemin=False, excludemax=False):
return iter(self.values(min, max, excludemin, excludemax))
def items(self, min=_marker, max=_marker,
excludemin=False, excludemax=False):
return self.keys(min, max, excludemin, excludemax, 'iteritems')
def iteritems(self, min=_marker, max=_marker,
excludemin=False, excludemax=False):
return iter(self.items(min, max, excludemin, excludemax))
def byValue(self, min):
return reversed(
sorted((v, k) for (k, v) in self.iteritems() if v >= min))
def insert(self, key, value):
return bool(self._set(key, value, True)[0])
class TreeSet(_Tree):
__slots__ = ()
def add(self, key):
return self._set(self._to_key(key))[0]
insert = add
def remove(self, key):
self._del(self._to_key(key))
def update(self, items):
add = self.add
for i in items:
add(i)
_p_resolveConflict = _Tree._p_resolveConflict
class set_operation(object):
__slots__ = ('func',
'set_type',
)
def __init__(self, func, set_type):
self.func = func
self.set_type = set_type
def __call__(self, *a, **k):
return self.func(self.set_type, *a, **k)
def difference(set_type, o1, o2):
if o1 is None or o2 is None:
return o1
i1 = _SetIteration(o1, True, 0)
i2 = _SetIteration(o2, False, 0)
if i1.useValues:
result = o1._mapping_type()
def copy(i):
result._keys.append(i.key)
result._values.append(i.value)
else:
result = o1._set_type()
def copy(i):
result._keys.append(i.key)
while i1.active and i2.active:
cmp_ = compare(i1.key, i2.key)
if cmp_ < 0:
copy(i1)
i1.advance()
elif cmp_ == 0:
i1.advance()
i2.advance()
else:
i2.advance()
while i1.active:
copy(i1)
i1.advance()
return result
def union(set_type, o1, o2):
if o1 is None:
return o2
if o2 is None:
return o1
i1 = _SetIteration(o1, False, 0)
i2 = _SetIteration(o2, False, 0)
result = o1._set_type()
def copy(i):
result._keys.append(i.key)
while i1.active and i2.active:
cmp_ = compare(i1.key, i2.key)
if cmp_ < 0:
copy(i1)
i1.advance()
elif cmp_ == 0:
copy(i1)
i1.advance()
i2.advance()
else:
copy(i2)
i2.advance()
while i1.active:
copy(i1)
i1.advance()
while i2.active:
copy(i2)
i2.advance()
return result
def intersection(set_type, o1, o2):
if o1 is None:
return o2
if o2 is None:
return o1
i1 = _SetIteration(o1, False, 0)
i2 = _SetIteration(o2, False, 0)
result = o1._set_type()
def copy(i):
result._keys.append(i.key)
while i1.active and i2.active:
cmp_ = compare(i1.key, i2.key)
if cmp_ < 0:
i1.advance()
elif cmp_ == 0:
copy(i1)
i1.advance()
i2.advance()
else:
i2.advance()
return result
def _prepMergeIterators(o1, o2):
MERGE_DEFAULT = getattr(o1, 'MERGE_DEFAULT', None)
if MERGE_DEFAULT is None:
raise TypeError("invalid set operation")
i1 = _SetIteration(o1, True, MERGE_DEFAULT)
i2 = _SetIteration(o2, True, MERGE_DEFAULT)
return i1, i2
def weightedUnion(set_type, o1, o2, w1=1, w2=1):
if o1 is None:
if o2 is None:
return 0, None
return w2, o2
if o2 is None:
return w1, o1
i1, i2 = _prepMergeIterators(o1, o2)
MERGE = getattr(o1, 'MERGE', None)
if MERGE is None and i1.useValues and i2.useValues:
raise TypeError("invalid set operation")
MERGE_WEIGHT = getattr(o1, 'MERGE_WEIGHT')
if (not i1.useValues) and i2.useValues:
i1, i2 = i2, i1
w1, w2 = w2, w1
_merging = i1.useValues or i2.useValues
if _merging:
result = o1._mapping_type()
def copy(i, w):
result._keys.append(i.key)
result._values.append(MERGE_WEIGHT(i.value, w))
else:
result = o1._set_type()
def copy(i, w):
result._keys.append(i.key)
while i1.active and i2.active:
cmp_ = compare(i1.key, i2.key)
if cmp_ < 0:
copy(i1, w1)
i1.advance()
elif cmp_ == 0:
result._keys.append(i1.key)
if _merging:
result._values.append(MERGE(i1.value, w1, i2.value, w2))
i1.advance()
i2.advance()
else:
copy(i2, w2)
i2.advance()
while i1.active:
copy(i1, w1)
i1.advance()
while i2.active:
copy(i2, w2)
i2.advance()
return 1, result
def weightedIntersection(set_type, o1, o2, w1=1, w2=1):
if o1 is None:
if o2 is None:
return 0, None
return w2, o2
if o2 is None:
return w1, o1
i1, i2 = _prepMergeIterators(o1, o2)
MERGE = getattr(o1, 'MERGE', None)
if MERGE is None and i1.useValues and i2.useValues:
raise TypeError("invalid set operation")
if (not i1.useValues) and i2.useValues:
i1, i2 = i2, i1
w1, w2 = w2, w1
_merging = i1.useValues or i2.useValues
if _merging:
result = o1._mapping_type()
else:
result = o1._set_type()
while i1.active and i2.active:
cmp_ = compare(i1.key, i2.key)
if cmp_ < 0:
i1.advance()
elif cmp_ == 0:
result._keys.append(i1.key)
if _merging:
result._values.append(MERGE(i1.value, w1, i2.value, w2))
i1.advance()
i2.advance()
else:
i2.advance()
if isinstance(result, (Set, TreeSet)):
return w1 + w2, result
return 1, result
def multiunion(set_type, seqs):
# XXX simple/slow implementation. Goal is just to get tests to pass.
result = set_type()
for s in seqs:
try:
iter(s)
except TypeError:
s = set_type((s, ))
result.update(s)
return result
def to_ob(self, v):
return v
def _packer_unpacker(struct_format):
s = Struct(struct_format)
return s.pack, s.unpack
int_pack, int_unpack = _packer_unpacker('i')
def to_int(self, v):
try:
int_pack(index(v))
except (struct_error, TypeError):
raise TypeError('32-bit integer expected')
return int(v)
float_pack = _packer_unpacker('f')[0]
def to_float(self, v):
try:
float_pack(v)
except struct_error:
raise TypeError('float expected')
return float(v)
long_pack, long_unpack = _packer_unpacker('q')
def to_long(self, v):
try:
long_pack(index(v))
except (struct_error, TypeError):
if isinstance(v, int_types):
raise ValueError("Value out of range", v)
raise TypeError('64-bit integer expected')
return int(v)
def to_bytes(l):
def to(self, v):
if not (isinstance(v, bytes) and len(v) == l):
raise TypeError("%s-byte array expected" % l)
return v
return to
tos = dict(I=to_int, L=to_long, F=to_float, O=to_ob)
MERGE_DEFAULT_int = 1
MERGE_DEFAULT_float = 1.0
def MERGE(self, value1, weight1, value2, weight2):
return (value1 * weight1) + (value2 * weight2)
def MERGE_WEIGHT_default(self, value, weight):
return value
def MERGE_WEIGHT_numeric(self, value, weight):
return value * weight
def _fix_pickle(mod_dict, mod_name):
# Make the pure-Python objects pickle with the same
# class names and types as the C extensions by setting the appropriate
# _BTree_reduce_as attribute.
# If the C extensions are not available, we also change the
# __name__ attribute of the type to match the C name (otherwise
# we wind up with *Py in the pickles)
# Each module must call this as `_fix_pickle(globals(), __name__)`
# at the bottom.
mod_prefix = mod_name.split('.')[-1][:2] # BTrees.OOBTree -> 'OO'
bucket_name = mod_prefix + 'Bucket'
py_bucket_name = bucket_name + 'Py'
have_c_extensions = mod_dict[bucket_name] is not mod_dict[py_bucket_name]
for name in 'Bucket', 'Set', 'BTree', 'TreeSet', 'TreeIterator':
raw_name = mod_prefix + name
py_name = raw_name + 'Py'
try:
py_type = mod_dict[py_name]
except KeyError:
if name == 'TreeIterator':
# Optional
continue
raise # pragma: no cover
raw_type = mod_dict[raw_name] # Could be C or Python
py_type._BTree_reduce_as = raw_type
py_type._BTree_reduce_up_bound = py_type
if not have_c_extensions: # pragma: no cover
# Set FooPy to have the __name__ of simply Foo.
# We can't do this if the C extension is available,
# because then mod_dict[FooPy.__name__] is not FooPy
# and pickle refuses to save something like that.
# On the other hand (no C extension) this makes our
# Python pickle match the C version by default
py_type.__name__ = raw_name
py_type.__qualname__ = raw_name # Py 3.3+
You can’t perform that action at this time.