Skip to content

Commit

Permalink
Use ffi.gc() on the ring nodes to avoid needing a weakref to Persiste…
Browse files Browse the repository at this point in the history
…nt objects.

Except on PyPy, where we can already weakref them automatically.

Fixes #133.
  • Loading branch information
jamadden committed Mar 4, 2020
1 parent 49c9ffb commit 15e9821
Show file tree
Hide file tree
Showing 12 changed files with 282 additions and 136 deletions.
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
- Add support for Python 3.9a3+.
See `issue 124 <https://github.com/zopefoundation/persistent/issues/124>`_.

- Fix the Python implementation of the PickleCache to be able to store
objects that cannot be weakly referenced. See `issue 133
<https://github.com/zopefoundation/persistent/issues/133>`_.

4.5.1 (2019-11-06)
------------------
Expand Down
36 changes: 34 additions & 2 deletions persistent/_ring_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,42 @@

ffi = FFI()
with open(os.path.join(this_dir, 'ring.h')) as f:
ffi.cdef(f.read())
cdefs = f.read()

# Define a structure with the same layout as CPersistentRing,
# and an extra member. We'll cast between them to reuse the
# existing functions.
struct_def = """
typedef struct CPersistentRingCFFI_struct
{
struct CPersistentRing_struct *r_prev;
struct CPersistentRing_struct *r_next;
intptr_t pobj_id; /* The id(PersistentPy object) */
} CPersistentRingCFFI;
"""

cdefs += struct_def + """
void cffi_ring_add(CPersistentRing* ring, void* elt);
void cffi_ring_del(void* elt);
void cffi_ring_move_to_head(CPersistentRing* ring, void* elt);
"""

ffi.cdef(cdefs)

source = """
#include "ring.c"
""" + struct_def + """
/* Like the other functions, but taking the CFFI version of the struct. This
* saves casting at runtime in Python.
*/
#define cffi_ring_add(ring, elt) ring_add((CPersistentRing*)ring, (CPersistentRing*)elt)
#define cffi_ring_del(elt) ring_del((CPersistentRing*)elt)
#define cffi_ring_move_to_head(ring, elt) ring_move_to_head((CPersistentRing*)ring, (CPersistentRing*)elt)
"""

ffi.set_source('persistent._ring',
'#include "ring.c"',
source,
include_dirs=[this_dir])

if __name__ == '__main__':
Expand Down
10 changes: 5 additions & 5 deletions persistent/cPersistence.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@

struct ccobject_head_struct;

typedef struct ccobject_head_struct PerCache;
typedef struct ccobject_head_struct PerCache;

/* How big is a persistent object?
12 PyGC_Head is two pointers and an int
8 PyObject_HEAD is an int and a pointer
12 jar, oid, cache pointers
8 ring struct
8 serialno
Expand Down Expand Up @@ -67,7 +67,7 @@ typedef struct ccobject_head_struct PerCache;
unsigned long field after a signed char state field and a
3-character reserved field. This didn't work because there
are packages in the wild that have their own copies of cPersistence.h
that didn't see the update.
that didn't see the update.
To get around this, we used the reserved space by making
estimated_size a 24-bit bit field in the space occupied by the old
Expand Down Expand Up @@ -132,14 +132,14 @@ static cPersistenceCAPIstruct *cPersistenceCAPI;

#define PER_PREVENT_DEACTIVATION(O) ((O)->state==cPersistent_UPTODATE_STATE && ((O)->state=cPersistent_STICKY_STATE))

/*
/*
Make a persistent object usable from C by:
- Making sure it is not a ghost
- Making it sticky.
IMPORTANT: If you call this and don't call PER_ALLOW_DEACTIVATION,
IMPORTANT: If you call this and don't call PER_ALLOW_DEACTIVATION,
your object will not be ghostified.
PER_USE returns a 1 on success and 0 failure, where failure means
Expand Down
1 change: 1 addition & 0 deletions persistent/persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
# check in __getattribute__
_SPECIAL_NAMES = set(SPECIAL_NAMES)

# __ring is for use by PickleCachePy and is opaque to us.
_SLOTS = ('__jar', '__oid', '__serial', '__flags', '__size', '__ring',)
_SPECIAL_NAMES.update([intern('_Persistent' + x) for x in _SLOTS])

Expand Down
159 changes: 120 additions & 39 deletions persistent/picklecache.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@
#
##############################################################################
import gc
import weakref

from weakref import WeakValueDictionary

from zope.interface import implementer
from zope.interface import classImplements

from persistent._compat import use_c_impl
from persistent._compat import PYPY
from persistent.interfaces import GHOST
from persistent.interfaces import IPickleCache
from persistent.interfaces import IExtendedPickleCache
Expand Down Expand Up @@ -47,6 +48,8 @@ def _gc_monitor(o):
pass

_OGA = object.__getattribute__
_OSA = object.__setattr__


def _sweeping_ring(f):
# A decorator for functions in the PickleCache
Expand All @@ -62,6 +65,87 @@ def locked(self, *args, **kwargs):
return locked


class _WeakValueDictionary(object):
# Maps from OID -> Persistent object, but
# only weakly references the Persistent object. This is similar
# to ``weakref.WeakValueDictionary``, but is customized depending on the
# platform. On PyPy, all objects can cheaply use a WeakRef, so that's
# what we actually use. On CPython, though, ``PersistentPy`` cannot be weakly
# referenced, so we rely on the fact that the ``id()`` of an object is its
# memory location, and we use ``ctypes`` to cast that integer back to
# the object.
#
# To remove stale addresses, we rely on the ``ffi.gc()`` object with the exact
# same lifetime as the ``PersistentPy`` object. It calls us, we get the ``id``
# back out of the CData, and clean up.
if PYPY: # pragma: no cover
def __init__(self):
self._data = WeakValueDictionary()

def _from_addr(self, addr):
return addr

def _save_addr(self, oid, obj):
return obj

cleanup_hook = None
else:
def __init__(self):
self._data = {}
self._addr_to_oid = {}

def _save_addr(self, oid, obj):
i = id(obj)
self._addr_to_oid[i] = oid
return i

try:
from ctypes import cast
from ctypes import py_object
cast = staticmethod(cast)
except ImportError: # pragma: no cover
def _from_addr(self, addr):
for i in gc.get_objects():
if id(i) == addr:
return i
return None
else:
def _from_addr(self, addr):
return self.cast(addr, self.py_object).value # pylint:disable=too-many-function-args

def cleanup_hook(self, cdata):
oid = self._addr_to_oid.pop(cdata.pobj_id, None)
self._data.pop(oid, None)

def __contains__(self, oid):
return oid in self._data

def __len__(self):
return len(self._data)

def __setitem__(self, key, value):
addr = self._save_addr(key, value)
self._data[key] = addr

def pop(self, oid):
return self._from_addr(self._data.pop(oid))

def items(self):
from_addr = self._from_addr
for oid, addr in self._data.items():
yield oid, from_addr(addr)

def get(self, oid, default=None):
addr = self._data.get(oid, self)
if addr is self:
return default
return self._from_addr(addr)

def __getitem__(self, oid):
addr = self._data[oid]
return self._from_addr(addr)


@use_c_impl
# We actually implement IExtendedPickleCache, but
# the C version does not, and our interface declarations are
Expand Down Expand Up @@ -99,8 +183,8 @@ def __init__(self, jar, target_size=0, cache_size_bytes=0):
self.drain_resistance = 0
self.non_ghost_count = 0
self.persistent_classes = {}
self.data = weakref.WeakValueDictionary()
self.ring = Ring()
self.data = _WeakValueDictionary()
self.ring = Ring(self.data.cleanup_hook)
self.cache_size_bytes = cache_size_bytes

# IPickleCache API
Expand All @@ -113,8 +197,8 @@ def __len__(self):
def __getitem__(self, oid):
""" See IPickleCache.
"""
value = self.data.get(oid)
if value is not None:
value = self.data.get(oid, self)
if value is not self:
return value
return self.persistent_classes[oid]

Expand Down Expand Up @@ -145,14 +229,14 @@ def __setitem__(self, oid, value):
raise ValueError('A different object already has the same oid')
# Match the C impl: it requires a jar. Let this raise AttributeError
# if no jar is found.
jar = getattr(value, '_p_jar')
jar = value._p_jar
if jar is None:
raise ValueError("Cached object jar missing")
# It also requires that it cannot be cached more than one place
existing_cache = getattr(jar, '_cache', None)
existing_cache = getattr(jar, '_cache', None) # type: PickleCache
if (existing_cache is not None
and existing_cache is not self
and existing_cache.data.get(oid) is not None):
and existing_cache is not self
and oid in existing_cache.data):
raise ValueError("Cache values may only be in one cache.")

if isinstance(value, type): # ZODB.persistentclass.PersistentMetaClass
Expand All @@ -163,6 +247,10 @@ def __setitem__(self, oid, value):
if _OGA(value, '_p_state') != GHOST and value not in self.ring:
self.ring.add(value)
self.non_ghost_count += 1
elif self.data.cleanup_hook:
# Ensure we begin monitoring for ``value`` to
# be deallocated.
self.ring.ring_node_for(value)

def __delitem__(self, oid):
""" See IPickleCache.
Expand All @@ -172,13 +260,12 @@ def __delitem__(self, oid):
if oid in self.persistent_classes:
del self.persistent_classes[oid]
else:
value = self.data.pop(oid)
self.ring.delete(value)
pobj = self.data.pop(oid)
self.ring.delete(pobj)

def get(self, oid, default=None):
""" See IPickleCache.
"""

value = self.data.get(oid, self)
if value is not self:
return value
Expand All @@ -202,6 +289,7 @@ def mru(self, oid):
self.non_ghost_count += 1
else:
self.ring.move_to_head(value)
return None

def ringlen(self):
""" See IPickleCache.
Expand All @@ -216,10 +304,10 @@ def items(self):
def lru_items(self):
""" See IPickleCache.
"""
result = []
for obj in self.ring:
result.append((obj._p_oid, obj))
return result
return [
(obj._p_oid, obj)
for obj in self.ring
]

def klass_items(self):
""" See IPickleCache.
Expand Down Expand Up @@ -310,6 +398,7 @@ def update_object_size_estimation(self, oid, new_size):
""" See IPickleCache.
"""
value = self.data.get(oid)

if value is not None:
# Recall that while the argument is given in bytes,
# we have to work with 64-block chunks (plus one)
Expand All @@ -326,7 +415,7 @@ def update_object_size_estimation(self, oid, new_size):
lambda self, nv: setattr(self, 'drain_resistance', nv)
)
cache_non_ghost_count = property(lambda self: self.non_ghost_count)
cache_data = property(lambda self: dict(self.data.items()))
cache_data = property(lambda self: dict(self.items()))
cache_klass_count = property(lambda self: len(self.persistent_classes))

# Helpers
Expand All @@ -347,20 +436,15 @@ def update_object_size_estimation(self, oid, new_size):

@_sweeping_ring
def _sweep(self, target, target_size_bytes=0):
# To avoid mutating datastructures in place or making a copy,
# and to work efficiently with both the CFFI ring and the
# deque-based ring, we collect the objects and their indexes
# up front and then hand them off for ejection.
# We don't use enumerate because that's slow under PyPy
i = -1
to_eject = []
for value in self.ring:
if ((target or target_size_bytes)
and (not target or self.non_ghost_count <= target)
and (self.total_estimated_size <= target_size_bytes
or not target_size_bytes)):
ejected = 0
ring = self.ring
for node, value in ring.iteritems():
if ((target or target_size_bytes) # pylint:disable=too-many-boolean-expressions
and (not target or self.non_ghost_count <= target)
and (self.total_estimated_size <= target_size_bytes
or not target_size_bytes)):
break
i += 1

if value._p_state == UPTODATE:
# The C implementation will only evict things that are specifically
# in the up-to-date state
Expand All @@ -379,17 +463,14 @@ def _sweep(self, target, target_size_bytes=0):

value._p_deactivate()
if (self._persistent_deactivate_ran
# Test-cases sneak in non-Persistent objects, sigh, so naturally
# they don't cooperate (without this check a bunch of test_picklecache
# breaks)
or not isinstance(value, self._SWEEPABLE_TYPES)):
to_eject.append((i, value))
# Test-cases sneak in non-Persistent objects, sigh, so naturally
# they don't cooperate (without this check a bunch of test_picklecache
# breaks)
or not isinstance(value, self._SWEEPABLE_TYPES)):
ring.delete_node(node)
ejected += 1
self.non_ghost_count -= 1

ejected = len(to_eject)
if ejected:
self.ring.delete_all(to_eject)

return ejected

@_sweeping_ring
Expand Down
2 changes: 2 additions & 0 deletions persistent/ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ ring_add(CPersistentRing *ring, CPersistentRing *elt)
void
ring_del(CPersistentRing *elt)
{
assert(elt->r_next);
elt->r_next->r_prev = elt->r_prev;
elt->r_prev->r_next = elt->r_next;
elt->r_next = NULL;
Expand All @@ -52,6 +53,7 @@ ring_del(CPersistentRing *elt)
void
ring_move_to_head(CPersistentRing *ring, CPersistentRing *elt)
{
assert(elt->r_next);
elt->r_prev->r_next = elt->r_next;
elt->r_next->r_prev = elt->r_prev;
elt->r_next = ring;
Expand Down
Loading

0 comments on commit 15e9821

Please sign in to comment.