Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
- Feature: Whenever setattr() is called on a persistent object, i…
Browse files Browse the repository at this point in the history
…t is

  marked as changed even if the new value equals the old one. To minimize
  writes to MongoDB, the latest database state is compared to the new state
  and the new state is only written when changes are detected. A flag called
  ``serialize.IGNORE_IDENTICAL_DOCUMENTS`` (default: ``True``) is used to
  control the feature. (Experimental)
  • Loading branch information
strichter committed Mar 30, 2012
1 parent 98e0194 commit 1a2f4e1
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 18 deletions.
13 changes: 10 additions & 3 deletions CHANGES.txt
Expand Up @@ -5,8 +5,6 @@ CHANGES
0.7.0 (2012-03-??)
------------------

- Added transaction ID to LoggingDecorator

- Feature: A new ``IConflictHandler`` interface now controls all aspects of
conflict resolution. The following implementations are provided:

Expand All @@ -22,13 +20,20 @@ CHANGES
* ``ResolvingSerialConflictHandler``: Another serial handler, but it has the
ability to resolve a conflict. For this to happen, a persistent object
must implement ``_p_resolveConflict(orig_state, cur_state, new_state)``,
which returns the new, merged state.
which returns the new, merged state. (Experimental)

As a result, the ``detect_conflicts`` flag of the data manager was removed
and replaced with the ``conflict_handler`` attribute. One can pass in the
``conflict_handler_factory`` to the data manager constructor. The factory
needs to expect on argument, the data manager.

- Feature: Whenever ``setattr()`` is called on a persistent object, it is
marked as changed even if the new value equals the old one. To minimize
writes to MongoDB, the latest database state is compared to the new state
and the new state is only written when changes are detected. A flag called
``serialize.IGNORE_IDENTICAL_DOCUMENTS`` (default: ``True``) is used to
control the feature. (Experimental)

- Feature: ``ConflictError`` has now a much more meaningful API. Instead of
just referencing the object and different serials, it now actual has the
original, current and new state documents.
Expand All @@ -40,6 +45,8 @@ CHANGES
- Feature: Provide a flag to turn on MongoDB access logging. The flag is false
by default, since access logging is very expensive.

- Feature: Added transaction ID to LoggingDecorator.

- Bug: We have seen several occasions in production where we suddenly lost
some state in some documents, which prohibited the objects from being
loadable again. The cause was that the ``_original_states`` attribute did not
Expand Down
18 changes: 16 additions & 2 deletions src/mongopersist/conflict.py
Expand Up @@ -46,6 +46,9 @@ def on_after_store(self, obj, state):
def on_modified(self, obj):
pass

def is_same(self, obj, orig_state, new_state):
return orig_state == new_state

def has_conflicts(self, objs):
return False

Expand All @@ -67,14 +70,25 @@ def on_before_set_state(self, obj, state):

def on_before_store(self, obj, state):
state[self.field_name] = u64(getattr(obj, '_p_serial', 0)) + 1
obj._p_serial = p64(state[self.field_name])
# Do not set the object serial yet, since we might not decide to store
# after all.

def on_after_store(self, obj, state):
pass
obj._p_serial = p64(state[self.field_name])

def on_modified(self, obj):
pass

def is_same(self, obj, orig_state, new_state):
if orig_state is None:
# This should never happen in a real running system.
return False
orig_state = orig_state.copy()
orig_state.pop(self.field_name)
new_state = new_state.copy()
new_state.pop(self.field_name)
return orig_state == new_state

def resolve(self, obj, orig_doc, cur_doc, new_doc):
raise NotImplementedError

Expand Down
5 changes: 5 additions & 0 deletions src/mongopersist/datamanager.py
Expand Up @@ -185,7 +185,12 @@ def __init__(self, conn, default_database=None,
self._inserted_objects = []
self._modified_objects = []
self._removed_objects = []
# Keeps states as found at the beginning of the transaction.
self._original_states = {}
# The latest states written to the database. This is different to the
# original states, since changes can be flushed to the database
# multiple times per transaction.
self._latest_states = {}
self._needs_to_join = True
self._object_cache = {}
self.annotations = {}
Expand Down
5 changes: 5 additions & 0 deletions src/mongopersist/interfaces.py
Expand Up @@ -81,6 +81,11 @@ def on_after_store(obj, state):
def on_modified(obj):
"""Method called when an object is registered as modified."""

def is_same(obj, orig_state, new_state):
"""Compares two states of the object and determines whether they are
the same. It should only compare actual object fields and not any
meta-data fields."""

def has_conflicts(objs):
"""Checks whether any of the passed in objects have conflicts.
Expand Down
33 changes: 27 additions & 6 deletions src/mongopersist/serialize.py
Expand Up @@ -31,6 +31,8 @@
SERIALIZERS = []
OID_CLASS_LRU = lru.LRUCache(20000)

IGNORE_IDENTICAL_DOCUMENTS = True

def get_dotted_name(obj):
return obj.__module__+'.'+obj.__name__

Expand Down Expand Up @@ -239,20 +241,34 @@ def store(self, obj, ref_only=False):
# if needed.
self._jar.conflict_handler.on_before_store(obj, doc)

stored = False
if obj._p_oid is None:
doc_id = coll.insert(doc)
stored = True
obj._p_jar = self._jar
obj._p_oid = pymongo.dbref.DBRef(coll_name, doc_id, db_name)
# Make sure that any other code accessing this object in this
# session, gets the same instance.
self._jar._object_cache[doc_id] = obj
else:
doc['_id'] = obj._p_oid.id
coll.save(doc)

# A hook, so that the conflict handler can modify the object or state
# document after an object was stored.
self._jar.conflict_handler.on_after_store(obj, doc)
# We only want to store a new version of the document, if it is
# different. We have to delegate that task to the conflict
# handler, since it might know about meta-fields that need to be
# ignored.
orig_doc = self._jar._latest_states.get(obj._p_oid)
if (not IGNORE_IDENTICAL_DOCUMENTS or
not self._jar.conflict_handler.is_same(obj, orig_doc, doc)):
coll.save(doc)
stored = True

if stored:
# Make sure that the doc is added to the latest states.
self._jar._latest_states[obj._p_oid] = doc

# A hook, so that the conflict handler can modify the object or state
# document after an object was stored.
self._jar.conflict_handler.on_after_store(obj, doc)

return obj._p_oid

Expand Down Expand Up @@ -406,9 +422,14 @@ def set_ghost_state(self, obj, doc=None):
# Now store the original state. It is assumed that the state dict is
# not modified later.
# Make sure that we never set the original state multiple times, even
# if reassigning the state within the same transaction.
# if reassigning the state within the same transaction. Otherwise we
# can never fully undo a transaction.
if obj._p_oid not in self._jar._original_states:
self._jar._original_states[obj._p_oid] = doc
# Sometimes this method is called to update the object state
# before storage. Only update the latest states when the object is
# originally loaded.
self._jar._latest_states[obj._p_oid] = doc
# Set the state.
obj.__setstate__(state)

Expand Down
46 changes: 39 additions & 7 deletions src/mongopersist/tests/test_conflict.py
Expand Up @@ -76,6 +76,14 @@ def doctest_NoCheckConflictHandler_basic():
>>> obj, state
(<Foo 'one'>, {'name': 'one'})
There is a method that allows for comparing 2 states of a given
object. The method is used to detect whether objects really changed.
>>> handler.is_same(obj, {'name': 'one'}, {'name': 'one'})
True
>>> handler.is_same(obj, {'name': 'one'}, {'name': 'eins'})
False
Let's check the conflict checking methods:
>>> handler.has_conflicts([obj])
Expand Down Expand Up @@ -146,26 +154,50 @@ def doctest_SimpleSerialConflictHandler_basic():
>>> state
{'name': 'one'}
Before the object state is stored in Mongo, we add the serial by taking
the current one and add 1 to it:
Before the object state is stored in Mongo, we add the serial to the
document by taking the current one and add 1 to it. Note that the object's
serial is not changed yet, since storing the document might still be
cancelled (for example by detecting that the DB state equals the new
state):
>>> state = {'name': 'one'}
>>> handler.on_before_store(obj, state)
>>> obj._p_serial
'\x00\x00\x00\x00\x00\x00\x00\x05'
>>> state
{'_py_serial': 6, 'name': 'one'}
The event handlers after store and on modification do not need to do
anything:
After the document was stored, we can safely update the object as well.
>>> state = {'name': 'one'}
>>> handler.on_after_store(obj, state)
>>> obj, state
(<Foo 'one'>, {'name': 'one'})
>>> obj._p_serial
'\x00\x00\x00\x00\x00\x00\x00\x06'
>>> state
{'_py_serial': 6, 'name': 'one'}
The event handler on modification does not need to do anything:
>>> handler.on_modified(obj)
>>> obj
<Foo 'one'>
There is a method that allows for comparing 2 states of a given
object. The method is used to detect whether objects really changed.
>>> handler.is_same(
... obj,
... {'name': 'one', '_py_serial': 1},
... {'name': 'one', '_py_serial': 2})
True
>>> handler.is_same(
... obj,
... {'name': 'one', '_py_serial': 1},
... {'name': 'eins', '_py_serial': 2})
False
As you can see, the serial number is omitted from the comparison, because
it does not represent part of the object state, but is state meta-data.
Let's check the conflict checking methods now. Initially, there are no
conflicts:
Expand Down
64 changes: 64 additions & 0 deletions src/mongopersist/tests/test_datamanager.py
Expand Up @@ -201,6 +201,70 @@ def doctest_MongoDataManager_object_dump_load_reset():
>>> foo._p_oid = foo2._p_oid
"""

def doctest_MongoDataManager_dump_only_on_real_change():
r"""MongoDataManager: dump(): dump on real change only.
The data manager only writes data when we actually have a difference in
state.
We have to use a serial conflict handler, otherwise it is hard to check
whether data was written.
>>> dm.conflict_handler = conflict.SimpleSerialConflictHandler(dm)
Let's now add an object:
>>> foo = Foo('foo')
>>> foo_ref = dm.insert(foo)
>>> dm.tpc_finish(None)
>>> coll = dm._get_collection_from_object(foo)
>>> coll.find_one({})
{u'_id': ObjectId('...'), u'_py_serial': 1, u'name': u'foo'}
So the original state is in. Let's now modify an object:
>>> foo = dm.load(foo_ref)
>>> foo.name = 'Foo'
>>> foo._p_changed
True
>>> dm.tpc_finish(None)
>>> coll.find_one({})
{u'_id': ObjectId('...'), u'_py_serial': 2, u'name': u'Foo'}
If we now modify the object again, but write the same value, the state
should not be written to Mongo.
>>> foo = dm.load(foo_ref)
>>> foo.name = 'Foo'
>>> foo._p_changed
True
>>> dm.tpc_finish(None)
>>> coll.find_one({})
{u'_id': ObjectId('...'), u'_py_serial': 2, u'name': u'Foo'}
Let's make sure everything also works when we flush the transaction in the
middle.
>>> foo = dm.load(foo_ref)
>>> foo.name = 'fuh'
>>> dm.flush()
>>> coll.find_one({})
{u'_id': ObjectId('...'), u'_py_serial': 3, u'name': u'fuh'}
>>> foo._p_changed
False
>>> foo.name = 'fuh'
>>> foo._p_changed
True
>>> dm.tpc_finish(None)
>>> coll.find_one({})
{u'_id': ObjectId('...'), u'_py_serial': 3, u'name': u'fuh'}
"""

def doctest_MongoDataManager_flush():
r"""MongoDataManager: flush()
Expand Down

0 comments on commit 1a2f4e1

Please sign in to comment.