Skip to content

Commit

Permalink
Make TransactionMetaData in charge of (de)serializing extension data
Browse files Browse the repository at this point in the history
IStorage implementations used to do this task themselves which leads to code
duplication and sometimes bugs (one was fixed recently in NEO). Like for object
serialization, this should be done by the upper layer (Connection).

This commit also provides a way to get raw extensions data while iterating
over transactions (this is actually the original purpose[2]). So far, extension
data could only be retrieved unpickled, which caused several problems:

- tools like `zodb dump` [1] cannot dump data exactly as stored on a
  storage. This makes database potentially not bit-to-bit identical to
  its original after restoring from such dump.

- `zodb dump` output could be changing from run to run on the same
  database. This comes from the fact that e.g. python dictionaries are
  unordered and so when pickling a dict back to bytes the result could
  be not the same as original.

  ( this problem can be worked-around partly to work reliably for e.g.
    dict with str keys - by always emitting items in key sorted order,
    but it is hard to make it work reliably for arbitrary types )

Both issues make it hard to verify integrity of database at the lowest
possible level after restoration, and make it hard to verify bit-to-bit
compatibility with non-python ZODB implementations.

For this, TransactionMetaData gets a new 'extension_bytes' attribute and
and common usage becames:

* Application committing a transaction:

  - 'extension' is set with a dictionary
  - the storage gets the bytes via 'extension_bytes'

* Iteration:

  - the storage passes bytes as 'extension' parameter of TransactionMetaData
  - the application can get extension data either as bytes ('extension_bytes')
    or deserialized ('extension'): in the former case, no deserialization
    happens and the returned value is exactly what was passed by the storage

Conversion between 'extension' and 'extension_bytes' is automatic and lazy,
so that these attributes remain writable.

[1] https://lab.nexedi.com/nexedi/zodbtools
[2] #183

Co-Authored-By: Kirill Smelkov <kirr@nexedi.com>
  • Loading branch information
jmuchemb and navytux committed May 31, 2018
1 parent 8f5ac63 commit 1298177
Show file tree
Hide file tree
Showing 9 changed files with 98 additions and 43 deletions.
8 changes: 2 additions & 6 deletions src/ZODB/BaseStorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from .Connection import TransactionMetaData
from .utils import z64, oid_repr, byte_ord, byte_chr, load_current
from .UndoLogCompatible import UndoLogCompatible
from ._compat import dumps, _protocol, py2_hasattr
from ._compat import py2_hasattr

log = logging.getLogger("ZODB.BaseStorage")

Expand Down Expand Up @@ -190,11 +190,7 @@ def tpc_begin(self, transaction, tid=None, status=' '):

user = transaction.user
desc = transaction.description
ext = transaction.extension
if ext:
ext = dumps(ext, _protocol)
else:
ext = ""
ext = transaction.extension_bytes

self._ude = user, desc, ext

Expand Down
34 changes: 30 additions & 4 deletions src/ZODB/Connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@
from ZODB import utils
import six

from ._compat import dumps, loads, _protocol
from .mvccadapter import HistoricalStorageAdapter

from . import valuedoc
from . import _compat

global_reset_counter = 0

Expand Down Expand Up @@ -1300,6 +1300,23 @@ def __repr__(self):
size.
"""

def _extension_property(func):
import inspect
name = func.__name__
alt, = inspect.getargspec(func).args
def fget(self):
try:
return self.__dict__[name]
except KeyError:
d = self.__dict__
value = d[name] = func(d[alt])
return value
def fset(self, value):
d = self.__dict__
d.pop(alt, None)
d[name] = value
return property(fget, fset)

@implementer(IStorageTransactionMetaData)
class TransactionMetaData(object):

Expand All @@ -1312,9 +1329,18 @@ def __init__(self, user=u'', description=u'', extension=b''):
description = description.encode('utf-8')
self.description = description

if not isinstance(extension, dict):
extension = _compat.loads(extension) if extension else {}
self.extension = extension
if isinstance(extension, bytes):
self.extension_bytes = extension
else:
self.extension = extension

@_extension_property
def extension(extension_bytes):
return loads(extension_bytes) if extension_bytes else {}

@_extension_property
def extension_bytes(extension):
return dumps(extension, _protocol) if extension else b''

def note(self, text): # for tests
text = text.strip()
Expand Down
9 changes: 1 addition & 8 deletions src/ZODB/FileStorage/FileStorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1988,15 +1988,8 @@ def __next__(self):

if h.status != "u":
pos = tpos + h.headerlen()
e = {}
if h.elen:
try:
e = loads(h.ext)
except:
pass

result = TransactionRecord(h.tid, h.status, h.user, h.descr,
e, pos, tend, self._file, tpos)
h.ext, pos, tend, self._file, tpos)

# Read the (intentionally redundant) transaction length
self._file.seek(tend)
Expand Down
8 changes: 2 additions & 6 deletions src/ZODB/fsrecover.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@
import ZODB.FileStorage
from ZODB.utils import u64, as_text
from ZODB.FileStorage import TransactionRecord
from ZODB._compat import loads

from persistent.TimeStamp import TimeStamp

Expand Down Expand Up @@ -143,12 +142,9 @@ def read_txn_header(f, pos, file_size, outp, ltid):
pos = tpos+(23+ul+dl+el)
user = f.read(ul)
description = f.read(dl)
if el:
try: e = loads(f.read(el))
except: e = {}
else: e = {}
ext = f.read(el)

result = TransactionRecord(tid, status, user, description, e, pos, tend,
result = TransactionRecord(tid, status, user, description, ext, pos, tend,
f, tpos)
pos = tend

Expand Down
5 changes: 5 additions & 0 deletions src/ZODB/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,11 +540,16 @@ class IStorageTransactionMetaData(Interface):
Note that unlike transaction.interfaces.ITransaction, the ``user``
and ``description`` attributes are bytes, not text.
Conversion between 'extension' and ''extension_bytes' is automatic.
Setting one of these attributes automatically updates the other.
"""
user = Attribute("Bytes transaction user")
description = Attribute("Bytes transaction Description")
extension = Attribute(
"A dictionary carrying a transaction's extended_info data")
extension_bytes = Attribute(
"Serialization of 'extension' attribute")


def set_data(ob, data):
Expand Down
33 changes: 26 additions & 7 deletions src/ZODB/tests/IteratorStorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ def iter_verify(self, txniter, revids, val0):

class IteratorStorage(IteratorCompare):

# Override this in tests for storages that delegate to TransactionMetaData
# the task to (de)serialize extension data.
use_extension_bytes = False

def checkSimpleIteration(self):
# Store a bunch of revisions of a single object
self._oid = oid = self._storage.new_oid()
Expand Down Expand Up @@ -85,14 +89,28 @@ def checkUndoZombie(self):
self.assertEqual(rec.data, None)

def checkTransactionExtensionFromIterator(self):
# It will be deserialized into a simple dict, which will be serialized
# differently. This simulates that 'dumps(loads(x), ...)' does not
# always return x.
class ext(dict):
def __reduce__(self):
return dict,(tuple(self.items()),)
ext = ext(foo=1)
oid = self._storage.new_oid()
revid = self._dostore(oid, data=MinPO(1))
iter = self._storage.iterator()
count = 0
for txn in iter:
self.assertEqual(txn.extension, {})
count +=1
self.assertEqual(count, 1)
revid = self._dostore(oid, data=MinPO(1), extension=ext)
txn, = self._storage.iterator()
self.assertEqual(txn.extension, ext)
try:
extension_bytes = txn.extension_bytes
except AttributeError:
# Volatile storages often don't serialize extension because it
# would be useless.
return
txn.extension = ext
if self.use_extension_bytes:
self.assertEqual(extension_bytes, txn.extension_bytes)
else:
self.assertNotEqual(extension_bytes, txn.extension_bytes)

def checkIterationIntraTransaction(self):
# TODO: Try this test with logging enabled. If you see something
Expand Down Expand Up @@ -215,6 +233,7 @@ def compare(self, storage1, storage2):
eq(txn1.user, txn2.user)
eq(txn1.description, txn2.description)
eq(txn1.extension, txn2.extension)
eq(txn1.extension_bytes, txn2.extension_bytes)
itxn1 = iter(txn1)
itxn2 = iter(txn2)
for rec1, rec2 in zip(itxn1, itxn2):
Expand Down
4 changes: 3 additions & 1 deletion src/ZODB/tests/StorageTestBase.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def tearDown(self):
ZODB.tests.util.TestCase.tearDown(self)

def _dostore(self, oid=None, revid=None, data=None,
already_pickled=0, user=None, description=None):
already_pickled=0, user=None, description=None, extension=None):
"""Do a complete storage transaction. The defaults are:
- oid=None, ask the storage for a new oid
Expand All @@ -149,6 +149,8 @@ def _dostore(self, oid=None, revid=None, data=None,
t.user = user
if description is not None:
t.description = description
if extension is not None:
t.extension = extension
try:
self._storage.tpc_begin(t)
# Store an object
Expand Down
11 changes: 10 additions & 1 deletion src/ZODB/tests/testFileStorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ class FileStorageTests(
ReadOnlyStorage.ReadOnlyStorage
):

use_extension_bytes = True

def open(self, **kwargs):
self._storage = ZODB.FileStorage.FileStorage('FileStorageTests.fs',
**kwargs)
Expand All @@ -77,7 +79,14 @@ def checkLongMetadata(self):
except POSException.StorageError:
pass
else:
self.fail("expect long user field to raise error")
self.fail("expect long description field to raise error")
try:
self._dostore(extension={s: 1})
except POSException.StorageError:
pass
else:
self.fail("expect long extension field to raise error")


def check_use_fsIndex(self):

Expand Down
29 changes: 19 additions & 10 deletions src/ZODB/tests/test_TransactionMetaData.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,18 @@
import unittest
import warnings

from .._compat import dumps, loads, _protocol
from .._compat import dumps, loads
from ..Connection import TransactionMetaData

class TransactionMetaDataTests(unittest.TestCase):

def test_basic(self):
t = TransactionMetaData(
u'user\x80', u'description\x80', dict(foo='FOO'))
extension = dict(foo='FOO')
t = TransactionMetaData(u'user\x80', u'description\x80', extension)
self.assertEqual(t.user, b'user\xc2\x80')
self.assertEqual(t.description, b'description\xc2\x80')
self.assertEqual(t.extension, dict(foo='FOO'))
self.assertEqual(t.extension, extension)
self.assertEqual(loads(t.extension_bytes), extension)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
self.assertEqual(t._extension, t.extension)
Expand All @@ -33,12 +34,14 @@ def test_basic(self):
self.assertTrue("_extension is deprecated" in str(w[-1].message))

def test_basic_no_encoding(self):
t = TransactionMetaData(
b'user', b'description', dumps(dict(foo='FOO'), _protocol))
extension = dict(foo='FOO')
extension_bytes = dumps(extension)
t = TransactionMetaData(b'user', b'description', extension_bytes)
self.assertEqual(t.user, b'user')
self.assertEqual(t.description, b'description')
self.assertEqual(t.extension, dict(foo='FOO'))
self.assertEqual(t.extension, extension)
self.assertEqual(t._extension, t.extension)
self.assertEqual(t.extension_bytes, extension_bytes)

def test_constructor_default_args(self):
t = TransactionMetaData()
Expand All @@ -53,16 +56,22 @@ def test_set_extension(self):
self.assertEqual(t.description, b'')
self.assertEqual(t.extension, {})
self.assertEqual(t._extension, t.extension)
self.assertEqual(t.extension_bytes, b'')

for name in 'extension', '_extension':
data = {name: name + 'foo'}
setattr(t, name, data)
self.assertEqual(t.extension, data)
self.assertEqual(t._extension, t.extension)
data = {}
setattr(t, name, data)
self.assertEqual(t.extension, data)
extension_bytes = t.extension_bytes
self.assertEqual(loads(extension_bytes), data)
empty = {}
setattr(t, name, empty)
self.assertEqual(t.extension, empty)
self.assertEqual(t._extension, t.extension)
self.assertEqual(t.extension_bytes, b'')
t.extension_bytes = extension_bytes
self.assertEqual(t.extension, data)

def test_used_by_connection(self):
import ZODB
Expand Down

0 comments on commit 1298177

Please sign in to comment.