Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
Fixed two tricky bugs with some initialization edge cases but big con…
Browse files Browse the repository at this point in the history
…sequences.

- Fixed a bug where shared colelction among derived classes would not work
  consistently.

- Fixed a bug during initialization of persistent sub-objects in that changes
  would not be seen in the transaction once the datamanger was flushed.
  • Loading branch information
strichter committed Oct 6, 2013
1 parent 61c732e commit e949ab5
Show file tree
Hide file tree
Showing 6 changed files with 194 additions and 32 deletions.
6 changes: 6 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ CHANGES
0.8.5 (unreleased)
------------------

- Fixed a bug where shared colelction among derived classes would not work
consistently.

- Fixed a bug during initialization of persistent sub-objects in that changes
would not be seen in the transaction once the datamanger was flushed.

- Added serializer for ``datetime.time``, since the loaded reduced state is
not usable (due to string to unicode conversion).

Expand Down
9 changes: 2 additions & 7 deletions src/mongopersist/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -391,13 +391,8 @@ When loading the addresses, they should be of the right type:
<Address Maynard (01754)>
>>> dm.root['stephan'].address2
<ExtendedAddress Tettau (01945) in Germany>


XXX: BUG with detecting derived classes properly. So for now just specify the
collection explicitely.

# >>> dm.root['stephan'].address3
# <ExtendedAddress Arnsdorf (01945) in Germany>
>>> dm.root['stephan'].address3
<ExtendedAddress Arnsdorf (01945) in Germany>


Tricky Cases
Expand Down
36 changes: 26 additions & 10 deletions src/mongopersist/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,15 @@ def get_collection_name(self, obj):
result = coll.find({'collection': coll_name,
'database': db_name})
if result.count() > 0:
setattr(obj, '_p_mongo_store_type', True)
setattr(obj.__class__, '_p_mongo_store_type', True)
map['doc_has_type'] = getattr(obj, '_p_mongo_store_type', False)
coll.save(map)
result = map
# Make sure that derived classes that share a collection know they
# have to store their type.
if (result['doc_has_type'] and
not getattr(obj, '_p_mongo_store_type', False)):
obj.__class__._p_mongo_store_type = True
AVAILABLE_NAME_MAPPINGS.add(map_hash)
return db_name, coll_name

Expand Down Expand Up @@ -158,9 +164,9 @@ def get_non_persistent_state(self, obj, seen):
state = {'_py_persistent_type': get_dotted_name(args[0])}
else:
state = {'_py_factory': get_dotted_name(factory),
'_py_factory_args': self.get_state(args, seen)}
'_py_factory_args': self.get_state(args, obj, seen)}
for name, value in obj_state.items():
state[name] = self.get_state(value, seen)
state[name] = self.get_state(value, obj, seen)
return state

def get_persistent_state(self, obj, seen):
Expand All @@ -179,7 +185,7 @@ def get_persistent_state(self, obj, seen):
# deserialization later.
return dbref

def get_state(self, obj, seen=None):
def get_state(self, obj, pobj=None, seen=None):
seen = seen or []
if isinstance(obj, interfaces.MONGO_NATIVE_TYPES):
# If we have a native type, we'll just use it as the state.
Expand Down Expand Up @@ -207,26 +213,36 @@ def get_state(self, obj, seen=None):
# need to be able to properly encode those.
return {'_py_type': 'type',
'path': get_dotted_name(obj)}

# We need to make sure that the object's jar and doc-object are
# set. This is important for the case when a sub-object was just
# added.
if getattr(obj, '_p_mongo_sub_object', False):
if obj._p_jar is None:
obj._p_jar = pobj._p_jar
obj._p_mongo_doc_object = pobj

if isinstance(obj, (tuple, list, PersistentList)):
# Make sure that all values within a list are serialized
# correctly. Also convert any sequence-type to a simple list.
return [self.get_state(value, seen) for value in obj]
return [self.get_state(value, pobj, seen) for value in obj]
if isinstance(obj, (dict, PersistentDict)):
# Same as for sequences, make sure that the contained values are
# properly serialized.
# Note: A big constraint in Mongo is that keys must be strings!
has_non_string_key = False
data = []
for key, value in obj.items():
data.append((key, self.get_state(value, seen)))
data.append((key, self.get_state(value, pobj, seen)))
has_non_string_key |= not isinstance(key, basestring)
if not has_non_string_key:
# The easy case: all keys are strings:
return dict(data)
else:
# We first need to reduce the keys and then produce a data
# structure.
data = [(self.get_state(key), value) for key, value in data]
data = [(self.get_state(key, pobj), value)
for key, value in data]
return {'dict_data': data}

if isinstance(obj, persistent.Persistent):
Expand All @@ -240,7 +256,7 @@ def get_state(self, obj, seen=None):
return self.get_non_persistent_state(obj, seen)

def get_full_state(self, obj):
doc = self.get_state(obj.__getstate__())
doc = self.get_state(obj.__getstate__(), obj)
# Add a persistent type info, if necessary.
if getattr(obj, '_p_mongo_store_type', False):
doc['_py_persistent_type'] = get_dotted_name(obj.__class__)
Expand Down Expand Up @@ -268,7 +284,7 @@ def store(self, obj, ref_only=False):
else:
# XXX: Handle newargs; see ZODB.serialize.ObjectWriter.serialize
# Go through each attribute and search for persistent references.
doc = self.get_state(obj.__getstate__())
doc = self.get_state(obj.__getstate__(), obj)

if getattr(obj, '_p_mongo_store_type', False):
doc['_py_persistent_type'] = get_dotted_name(obj.__class__)
Expand Down Expand Up @@ -315,7 +331,7 @@ class ObjectReader(object):
def __init__(self, jar):
self._jar = jar
self._single_map_cache = {}
self.preferPersistent=True
self.preferPersistent = True

def simple_resolve(self, path):
# We try to look up the klass from a cache. The important part here is
Expand Down
165 changes: 151 additions & 14 deletions src/mongopersist/tests/test_datamanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,30 @@
import transaction
from bson import dbref, objectid

from mongopersist import conflict, interfaces, testing, datamanager
from mongopersist import conflict, interfaces, serialize, testing, datamanager

class Root(persistent.Persistent):
pass

class Foo(persistent.Persistent):
def __init__(self, name=None):
self.name = name

def __repr__(self):
return '<%s %s>' %(self.__class__.__name__, self.name)

class Super(persistent.Persistent):
_p_mongo_collection = 'Super'

def __init__(self, name=None):
self.name = name

def __repr__(self):
return '<%s %s>' %(self.__class__.__name__, self.name)

class Sub(Super):
pass

class Bar(persistent.Persistent):
_p_mongo_sub_object = True

Expand Down Expand Up @@ -188,7 +206,7 @@ def doctest_MongoDataManager_object_dump_load_reset():
>>> foo._p_changed
True
>>> dm._registered_objects
[<mongopersist.tests.test_datamanager.Foo object at 0x2fe1f50>]
[<Foo Foo>]
>>> foo_ref = dm.dump(foo)
Expand Down Expand Up @@ -346,7 +364,7 @@ def doctest_MongoDataManager_flush():
The object is now registered with the data manager:
>>> dm._registered_objects
[<mongopersist.tests.test_datamanager.Foo object at 0x2f7b9b0>]
[<Foo Foo>]
>>> foo_new._p_serial
'\x00\x00\x00\x00\x00\x00\x00\x01'
Expand Down Expand Up @@ -408,22 +426,21 @@ def doctest_MongoDataManager_insert():
It is also added to the list of inserted objects:
>>> dm._inserted_objects
[<mongopersist.tests.test_datamanager.Foo object at 0x18d41b8>]
[<Foo foo>]
Let's make sure it is really in Mongo:
>>> dm.reset()
>>> foo_new = dm.load(foo_ref)
>>> foo_new
<mongopersist.tests.test_datamanager.Foo object at 0x27cade8>
<Foo foo>
Notice, that we cannot insert the object again:
>>> dm.insert(foo_new)
Traceback (most recent call last):
...
ValueError: ('Object has already an OID.',
<mongopersist.tests.test_datamanager.Foo object at 0x1fecde8>)
ValueError: ('Object has already an OID.', <Foo foo>)
Finally, registering a new object will not trigger an insert, but only
schedule the object for writing. This is done, since sometimes objects are
Expand All @@ -434,7 +451,7 @@ def doctest_MongoDataManager_insert():
>>> dm.register(foo2)
>>> dm._registered_objects
[<mongopersist.tests.test_datamanager.Foo object at 0x3087b18>]
[<Foo Foo 2>]
But storing works as expected (flush is implicit before find):
Expand Down Expand Up @@ -484,14 +501,13 @@ def doctest_MongoDataManager_remove():
Also, the object is added to the list of removed objects:
>>> dm._removed_objects
[<mongopersist.tests.test_datamanager.Foo object at 0x1693140>]
[<Foo foo>]
Note that you cannot remove objects that are not in the database:
>>> dm.remove(Foo('Foo 2'))
Traceback (most recent call last):
ValueError: ('Object does not have OID.',
<mongopersist.tests.test_datamanager.Foo object at 0x1982ed8>)
ValueError: ('Object does not have OID.', <Foo Foo 2>)
There is an edge case, if the object is inserted and removed in the same
transaction:
Expand Down Expand Up @@ -634,12 +650,12 @@ def doctest_MongoDataManager_abort():
>>> foo = dm.load(foo_ref)
>>> foo.name = '1'
>>> dm._registered_objects
[<mongopersist.tests.test_datamanager.Foo object at 0x187b1b8>]
[<Foo 1>]
>>> foo2 = dm.load(foo2_ref)
>>> dm.remove(foo2)
>>> dm._removed_objects
[<mongopersist.tests.test_datamanager.Foo object at 0x1e5c140>]
[<Foo two>]
>>> foo3_ref = dm.insert(Foo('three'))
Expand Down Expand Up @@ -876,7 +892,8 @@ def doctest_MongoDataManager_tpc_finish():
>>> foo3._p_serial
'\x00\x00\x00\x00\x00\x00\x00\x04'
When there is no change in the objects, serial is not incremented
When there is no change in the objects, serial is not incremented:
>>> dm.reset()
>>> foo4 = dm.load(foo._p_oid)
>>> dm._registered_objects = [foo4.bar, foo4]
Expand Down Expand Up @@ -911,6 +928,126 @@ def doctest_MongoDataManager_sortKey():
('MongoDataManager', 0)
"""


def doctest_MongoDataManager_sub_objects():
r"""MongoDataManager: Properly handling initialization of sub-objects.
When `_p_mongo_sub_object` objects are loaded from Mongo, their `_p_jar`
and more importantly their `_p_mongo_doc_object` attributes are
set.
However, when a sub-object is initially added, those attributes are
missing.
>>> foo = Foo('one')
>>> dm.root['one'] = foo
>>> dm.tpc_finish(None)
>>> foo = dm.root['one']
>>> foo._p_changed
>>> foo.list = serialize.PersistentList()
>>> foo.list._p_jar
>>> getattr(foo.list, '_p_mongo_doc_object', 'Missing')
'Missing'
Of course, the parent object has changed, since an attribute has been set
on it.
>>> foo._p_changed
True
Now, since we are dealing with an external database and queries, it
frequently happens that all changed objects are flushed to the database
before running a query. In our case, this saves the main object andmarks
it unchanged again:
>>> dm.flush()
>>> foo._p_changed
False
However, while flushing, no object is read from the database again. If
the jar and document obejct are not set on the sub-object, any changes to
it would not be seen. Thus, the serialization process *must* assign the
jar and document object attributes, if not set.
>>> foo.list._p_jar is dm
True
>>> foo.list._p_mongo_doc_object is foo
True
Let's now ensure that changing the sub-object will have the proper effect:
>>> foo.list.append(1)
>>> foo.list._p_changed
True
>>> dm.tpc_finish(None)
>>> foo = dm.root['one']
>>> foo.list
[1]
Note: Most of the implementation of this feature is in the `getState()`
method of the `ObjectWriter` class.
"""


def doctest_MongoDataManager_collection_sharing():
r"""MongoDataManager: Properly share collections with sub-classes
When objects do not specify a collection, then a collection based on the
class path is created for them. In that case, when a sub-class is created,
the same collection should be used. However, during de-serialization, it
is important that we select the correct class to use.
>>> dm.root['app'] = Root()
>>> dm.root['app'].one = Super('one')
>>> dm.root['app'].one
<Super one>
>>> dm.root['app'].two = Sub('two')
>>> dm.root['app'].two
<Sub two>
>>> dm.root['app'].three = Sub('three')
>>> dm.root['app'].three
<Sub three>
>>> dm.tpc_finish(None)
Let's now load everything again:
>>> dm.root['app'].one
<Super one>
>>> dm.root['app'].two
<Sub two>
>>> dm.root['app'].three
<Sub three>
>>> dm.tpc_finish(None)
Make sure that after a restart, the objects can still be stored.
>>> serialize.COLLECTIONS_WITH_TYPE = set()
>>> serialize.AVAILABLE_NAME_MAPPINGS = set()
>>> serialize.PATH_RESOLVE_CACHE = {}
>>> del Sub._p_mongo_store_type
>>> dm2 = datamanager.MongoDataManager(
... conn, default_database = DBNAME, root_database = DBNAME)
>>> dm2.root['app'].four = Sub('four')
>>> dm2.tpc_finish(None)
>>> serialize.COLLECTIONS_WITH_TYPE = set()
>>> serialize.AVAILABLE_NAME_MAPPINGS = set()
>>> serialize.PATH_RESOLVE_CACHE = {}
>>> dm2.root['app'].four
<Sub four>
"""


def doctest_process_spec():
r"""process_spec(): General test
Expand Down
2 changes: 1 addition & 1 deletion src/mongopersist/tests/test_serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def doctest_ObjectWriter_get_state_Persistent():
>>> top2 = Top()
>>> top2._p_mongo_sub_object = True
>>> writer.get_state(top2)
>>> writer.get_state(top2, top)
{'_py_persistent_type': 'mongopersist.tests.test_serialize.Top'}
"""

Expand Down
Loading

0 comments on commit e949ab5

Please sign in to comment.