From 8160568b246b55d7633ecab9ec743699c06e24d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Perrin?= Date: Mon, 4 Mar 2024 21:38:45 +0900 Subject: [PATCH] FileStorage: fix rare data corruption when using restore after multiple undos (#395) * FileStorage: fix data corruption when using restore after multiple undos The case of a history like this: - T0 initialize an object in state 0 - T1 modifies object to state 1 - T2 modifies object to state 2 - T3 undo T2 and T1, bringing back to state 0 - T4 modified object to state 3 - T5 undo T4, bringing back object to state 0 was not correct after `restore`: the state is 1 instead of the expected 0. This is because T3 contains two data records: - an undo of T2, with a backpointer to the data of state 1 - an undo of T1, with a backpointer to the data of state 0 When restoring T5 (the undo of T4), the transaction is made of one data record, with a backpointer that is copied from the backpointer from T3, but this uses backpointer from the first record for this object, which is incorrect, in such a case where there is more than one backpointer for the same oid, we need to iterate in all data record to find the oldest version. Co-authored-by: Kirill Smelkov --- CHANGES.rst | 3 ++ src/ZODB/FileStorage/FileStorage.py | 46 +++++++++++++++++++---------- src/ZODB/tests/RecoveryStorage.py | 40 +++++++++++++++++++++++++ src/ZODB/tests/testFileStorage.py | 4 +++ 4 files changed, 77 insertions(+), 16 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 5928eaf32..8b3791c39 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,6 +11,9 @@ - Fix sorting issue in ``scripts/space.py``. +- FileStorage: fix a rare data corruption when using restore after multiple undos. + For details see `#395 `_. + - Fix exit code of ``repozo`` script in case of verification error. For details see `#396 `_. diff --git a/src/ZODB/FileStorage/FileStorage.py b/src/ZODB/FileStorage/FileStorage.py index a3afec596..e6d4e76f4 100644 --- a/src/ZODB/FileStorage/FileStorage.py +++ b/src/ZODB/FileStorage/FileStorage.py @@ -663,6 +663,10 @@ def _data_find(self, tpos, oid, data): # Else oid's data record contains the data, and the file offset of # oid's data record is returned. This data record should contain # a pickle identical to the 'data' argument. + # When looking for oid's data record we scan all data records in + # the transaction till the end looking for the latest record with oid, + # even if there are multiple such records. This matches load behaviour + # which uses the data record created by last store. # Unclear: If the length of the stored data doesn't match len(data), # an exception is raised. If the lengths match but the data isn't @@ -672,28 +676,38 @@ def _data_find(self, tpos, oid, data): tid, tl, status, ul, dl, el = unpack(TRANS_HDR, h) status = as_text(status) self._file.read(ul + dl + el) - tend = tpos + tl + 8 + tend = tpos + tl pos = self._file.tell() + data_hdr = None + data_pos = 0 + # scan all data records in this transaction looking for the latest + # record with our oid while pos < tend: h = self._read_data_header(pos) if h.oid == oid: - # Make sure this looks like the right data record - if h.plen == 0: - # This is also a backpointer. Gotta trust it. - return pos - if h.plen != len(data): - # The expected data doesn't match what's in the - # backpointer. Something is wrong. - logger.error("Mismatch between data and" - " backpointer at %d", pos) - return 0 - _data = self._file.read(h.plen) - if data != _data: - return 0 - return pos + data_hdr = h + data_pos = pos pos += h.recordlen() self._file.seek(pos) - return 0 + if data_hdr is None: + return 0 + + # return position of found data record, but make sure this looks like + # the right data record to return. + if data_hdr.plen == 0: + # This is also a backpointer, Gotta trust it. + return data_pos + else: + if data_hdr.plen != len(data): + # The expected data doesn't match what's in the + # backpointer. Something is wrong. + logger.error("Mismatch between data and" + " backpointer at %d", pos) + return 0 + _data = self._file.read(data_hdr.plen) + if data != _data: + return 0 + return data_pos def restore(self, oid, serial, data, version, prev_txn, transaction): # A lot like store() but without all the consistency checks. This diff --git a/src/ZODB/tests/RecoveryStorage.py b/src/ZODB/tests/RecoveryStorage.py index ad2aa6735..80b2caede 100644 --- a/src/ZODB/tests/RecoveryStorage.py +++ b/src/ZODB/tests/RecoveryStorage.py @@ -199,3 +199,43 @@ def testRestoreWithMultipleObjectsInUndoRedo(self): # part of the test. self._dst.copyTransactionsFrom(self._storage) self.compare(self._storage, self._dst) + + def testRestoreWithMultipleUndoRedo(self): + db = DB(self._storage) + c = db.open() + r = c.root() + + # TO + r["obj"] = MinPO(0) + transaction.commit() + c.close() + + # T1: do 1 + r = db.open().root() + r["obj"].value = 1 + transaction.commit() + + # T2: do 2 + r = db.open().root() + r["obj"].value = 2 + transaction.commit() + + # T3: undo T1 and T2 + db.undoMultiple([u["id"] for u in db.undoLog(0, 2)]) + transaction.commit() + # obj will be a backpointer to T0 + + # T4: do 3 + r = db.open().root() + r["obj"].value = 3 + transaction.commit() + + # T4: undo + self._undo(self._storage.undoInfo()[0]['id']) + # obj will be a backpointer to T3, which is a backpointer to T0 + + r = db.open().root() + self.assertEqual(r["obj"].value, 0) + + self._dst.copyTransactionsFrom(self._storage) + self.compare(self._storage, self._dst) diff --git a/src/ZODB/tests/testFileStorage.py b/src/ZODB/tests/testFileStorage.py index 9d1fa1010..83753cc5f 100644 --- a/src/ZODB/tests/testFileStorage.py +++ b/src/ZODB/tests/testFileStorage.py @@ -456,6 +456,10 @@ def testRestoreAcrossPack(self): # Skip this check as it calls restore directly. pass + def testRestoreWithMultipleUndoRedo(self): + # This case is only supported with restore, not with store "emulation" + pass + class AnalyzeDotPyTest(StorageTestBase.StorageTestBase):