Skip to content

Commit

Permalink
After study, the delayed release of iterators under PyPy seems harmle…
Browse files Browse the repository at this point in the history
…ss. Move the gc calls that clear them to the test case.
  • Loading branch information
jamadden committed Apr 10, 2015
1 parent e20bcee commit 1a55a0a
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 19 deletions.
30 changes: 14 additions & 16 deletions src/ZEO/ClientStorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,6 @@
except ImportError:
ResolvedSerial = 'rs'

# ClientStorage keeps track of open iterators in a
# WeakValueDictionary. Under non-refcounted implementations,
# like PyPy, the weak references are only cleared when
# a GC runs. To make sure they are cleared when requested,
# we request a GC in that case.
# XXX: Is this needed? Do we have to dispose of them in a timely
# fashion? There are a few tests in IterationTests.py that
# directly check the length of the internal data structures, but
# if they stick around a bit longer is that visible to real clients,
# or could cause any problems (E.g., reuse of ids?)
_ITERATOR_GC_NEEDS_GC = not hasattr(sys, 'getrefcount')


def tid2time(tid):
return str(TimeStamp(tid))

Expand Down Expand Up @@ -1557,10 +1544,21 @@ def _iterator_gc(self, disconnected=False):
self._iterator_ids.clear()
return

if _ITERATOR_GC_NEEDS_GC:
gc.collect()

# Recall that self._iterators is a WeakValueDictionary. Under
# non-refcounted implementations like PyPy, this means that
# unreachable iterators (and their IDs) may still be in this
# map for some arbitrary period of time (until the next
# garbage collection occurs.) This is fine: the server
# supports being asked to GC the same iterator ID more than
# once. Iterator ids can be reused, but only after a server
# restart, after which we had already been called with
# `disconnected` True and so had cleared out our map anyway,
# plus we simply replace whatever is in the map if we get a
# duplicate id---and duplicates at that point would be dead
# objects waiting to be cleaned up. So there's never any risk
# of confusing TransactionIterator objects that are in use.
iids = self._iterator_ids - set(self._iterators)
self._iterators._last_gc = time.time() # let tests know we've been called
if iids:
try:
self._server.iterator_gc(list(iids))
Expand Down
4 changes: 4 additions & 0 deletions src/ZEO/tests/InvalidationTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ def checkConcurrentUpdates2Storages_emulated(self):
self._check_tree(cn, tree)
self._check_threads(tree, t1, t2)

transaction.abort()
cn.close()
db1.close()
db2.close()
Expand Down Expand Up @@ -374,6 +375,7 @@ def checkConcurrentUpdates2Storages(self):
self._check_tree(cn, tree)
self._check_threads(tree, t1, t2)

transaction.abort()
cn.close()
db1.close()
db2.close()
Expand Down Expand Up @@ -428,6 +430,7 @@ def checkConcurrentUpdates1Storage(self):
self._check_tree(cn, tree)
self._check_threads(tree, t1, t2)

transaction.abort()
cn.close()
db1.close()

Expand Down Expand Up @@ -462,6 +465,7 @@ def checkConcurrentUpdates2StoragesMT(self):
self._check_tree(cn, tree)
self._check_threads(tree, t1, t2, t3)

transaction.abort()
cn.close()
db1.close()
db2.close()
Expand Down
39 changes: 36 additions & 3 deletions src/ZEO/tests/IterationTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,41 @@

import transaction
import six

import gc

class IterationTests:

def _assertIteratorIdsEmpty(self):
# Account for the need to run a GC collection
# under non-refcounted implementations like PyPy
# for storage._iterator_gc to fully do its job.
# First, confirm that it ran
self.assertTrue(self._storage._iterators._last_gc > 0)
gc_enabled = gc.isenabled()
gc.disable() # make sure there's no race conditions cleaning out the weak refs
try:
self.assertEquals(0, len(self._storage._iterator_ids))
except AssertionError:
# Ok, we have ids. That should also mean that the
# weak dictionary has the same length.

self.assertEqual(len(self._storage._iterators), len(self._storage._iterator_ids))
# Now if we do a collection and re-ask for iterator_gc
# everything goes away as expected.
gc.enable()
gc.collect()
gc.collect() # sometimes PyPy needs it twice to clear weak refs

self._storage._iterator_gc()

self.assertEqual(len(self._storage._iterators), len(self._storage._iterator_ids))
self.assertEquals(0, len(self._storage._iterator_ids))
finally:
if gc_enabled:
gc.enable()
else:
gc.disable()

def checkIteratorGCProtocol(self):
# Test garbage collection on protocol level.
server = self._storage._server
Expand Down Expand Up @@ -78,8 +109,9 @@ def checkIteratorGCStorageCommitting(self):

# GC happens at the transaction boundary. After that, both the storage
# and the server have forgotten the iterator.
self._storage._iterators._last_gc = -1
self._dostore()
self.assertEquals(0, len(self._storage._iterator_ids))
self._assertIteratorIdsEmpty()
self.assertRaises(KeyError, self._storage._server.iterator_next, iid)

def checkIteratorGCStorageTPCAborting(self):
Expand All @@ -93,9 +125,10 @@ def checkIteratorGCStorageTPCAborting(self):
iid = list(self._storage._iterator_ids)[0]

t = transaction.Transaction()
self._storage._iterators._last_gc = -1
self._storage.tpc_begin(t)
self._storage.tpc_abort(t)
self.assertEquals(0, len(self._storage._iterator_ids))
self._assertIteratorIdsEmpty()
self.assertRaises(KeyError, self._storage._server.iterator_next, iid)

def checkIteratorGCStorageDisconnect(self):
Expand Down

0 comments on commit 1a55a0a

Please sign in to comment.