Skip to content

Commit

Permalink
WT-8747 Reading between commit_timestamp and durable_timestamp can pr…
Browse files Browse the repository at this point in the history
…oduce inconsistency (#7600)

Test changes to avoid reading between the commit and durable timestamps of updates, except when we want to do it on purpose in durable_ts03 and hs06.

Document the issue. For now at least it's the application's problem.
  • Loading branch information
sauclovian-wt committed Feb 25, 2022
1 parent 9aefe65 commit bafff54
Show file tree
Hide file tree
Showing 21 changed files with 152 additions and 109 deletions.
11 changes: 11 additions & 0 deletions src/docs/arch-timestamp.dox
Expand Up @@ -289,6 +289,17 @@ Reads between the prepare timestamp and commit timestamp of a
transaction that has been prepared but not committed fail with
::WT_PREPARE_CONFLICT.

Reads between the commit timestamp and durable timestamp of a
transaction that has been committed but is not yet stable (that is,
the stable timestamp is at or past the commit timestamp but has not
yet advanced to the durable timestamp) are potentially unsafe.
If a second transaction performs such a read and then commits with an
<i>earlier</i> durable timestamp, and a checkpoint includes the second
transaction's stable timestamp but not the first, that checkpoint then
contains inconsistent data.
Avoiding this inconsistency is, for the time being at least, the
application's responsibility.

See @ref arch-transaction-prepare for further discussion of prepared
transactions.

Expand Down
19 changes: 19 additions & 0 deletions src/docs/timestamp-prepare.dox
Expand Up @@ -47,6 +47,25 @@ transactions are higher-level MongoDB operations, requiring cluster-level
consensus on durability. Applications without similar requirements for prepared
transactions should set the durable and commit timestamps to the same time.

When a transaction has a durable timestamp later than its commit timestamp,
reading its writes in a second transaction and then committing other writes such
that the second transaction becomes durable before the first can produce data
inconsistency.
In this scenario the second transaction depends on the first; thus it must be
rolled back if the first transaction is rolled back; thus it must not become
durable before the first transaction.
Applications that create gaps between their commit timestamps and durable
timestamps are responsible for either not reading in those gaps, or establishing
an ordering for the durable timestamps of their commits to make sure that
this scenario cannot occur.
(Note that for the purposes of this issue the commit timestamp of a non-prepared
transaction is also its durable timestamp, and committing with no timestamp is
roughly comparable to committing at the current stable timestamp.)

\warning This scenario is not currently detected by WiredTiger; applications are
responsible for avoiding it.
In future versions such transactions might fail.

Prepared transactions have their own configuration keyword for rounding
timestamps; see @ref timestamp_roundup for more information.

Expand Down
2 changes: 1 addition & 1 deletion test/suite/test_durable_rollback_to_stable.py
Expand Up @@ -99,7 +99,7 @@ def test_durable_rollback_to_stable(self):

# Read the first update value with timestamp.
self.assertEquals(cursor.reset(), 0)
session.begin_transaction('read_timestamp=' + self.timestamp_str(200))
session.begin_transaction('read_timestamp=' + self.timestamp_str(220))
self.assertEquals(cursor.next(), 0)
for i in range(1, 50):
self.assertEquals(cursor.get_value(), ds.value(111))
Expand Down
2 changes: 1 addition & 1 deletion test/suite/test_durable_ts01.py
Expand Up @@ -98,7 +98,7 @@ def test_durable_ts01(self):

# Read the first update value with timestamp.
self.assertEquals(cursor.reset(), 0)
session.begin_transaction('read_timestamp=' + self.timestamp_str(200))
session.begin_transaction('read_timestamp=' + self.timestamp_str(220))
self.assertEquals(cursor.next(), 0)
for i in range(1, 50):
self.assertEquals(cursor.get_value(), ds.value(111))
Expand Down
9 changes: 8 additions & 1 deletion test/suite/test_durable_ts03.py
Expand Up @@ -96,9 +96,16 @@ def test_durable_ts03(self):
self.assertEqual(value, valueA)
session.commit_transaction()

# Read the updated data to confirm that it is visible.
# Check that the updated data can still be read even while it is not yet durable.
self.assertEquals(cursor.reset(), 0)
session.begin_transaction('read_timestamp=' + self.timestamp_str(210))
for key, value in cursor:
self.assertEqual(value, valueB)
session.rollback_transaction()

# Read the updated data to confirm that it is visible.
self.assertEquals(cursor.reset(), 0)
session.begin_transaction('read_timestamp=' + self.timestamp_str(220))
for key, value in cursor:
self.assertEqual(value, valueB)
session.commit_transaction()
Expand Down
38 changes: 19 additions & 19 deletions test/suite/test_flcs02.py
Expand Up @@ -30,7 +30,7 @@
from wtdataset import SimpleDataSet
from wtscenario import make_scenarios

# test_flcs01.py
# test_flcs02.py
#
# Test various cases of deleting values and expecting them to read back as 0,
# in the presence of timestamps and history.
Expand All @@ -39,7 +39,7 @@
# evict it explicitly, to make sure that the first section of the test exercises
# in-memory update records. (Testing on an in-memory database does not have that
# effect.)
class test_flcs01(wttest.WiredTigerTestCase):
class test_flcs02(wttest.WiredTigerTestCase):
prepare_values = [
('no_prepare', dict(do_prepare=False)),
('prepare', dict(do_prepare=True))
Expand Down Expand Up @@ -134,21 +134,20 @@ def delete_readback_commit(self, cursor, k, readts, committs):
cursor.reset()
self.session.rollback_transaction()

self.session.begin_transaction('read_timestamp=' + self.timestamp_str(committs))
v = cursor[k]
self.assertEqual(v, 0)
cursor.reset()
self.check_next(cursor, k, 0)
self.check_prev(cursor, k, 0)
self.session.rollback_transaction()
def readat(readts):
self.session.begin_transaction('read_timestamp=' + self.timestamp_str(readts))
v = cursor[k]
self.assertEqual(v, 0)
cursor.reset()
self.check_next(cursor, k, 0)
self.check_prev(cursor, k, 0)
self.session.rollback_transaction()

self.session.begin_transaction('read_timestamp=' + self.timestamp_str(committs + 5))
v = cursor[k]
self.assertEqual(v, 0)
cursor.reset()
self.check_next(cursor, k, 0)
self.check_prev(cursor, k, 0)
self.session.rollback_transaction()
if not self.do_prepare:
# Avoid reading between commit and durable.
readat(committs)
readat(committs+1)
readat(committs+5)

def test_flcs(self):
uri = "table:test_flcs02"
Expand Down Expand Up @@ -262,7 +261,8 @@ def test_flcs(self):
self.session.rollback_transaction()

# This should definitely have extended the table in the present.
self.session.begin_transaction('read_timestamp=' + self.timestamp_str(50))
read_ts = 51 if self.do_prepare else 50
self.session.begin_transaction('read_timestamp=' + self.timestamp_str(read_ts))
v = cursor[appendkey2]
self.assertEqual(v, 0)
cursor.reset()
Expand All @@ -274,15 +274,15 @@ def test_flcs(self):
self.evict(uri, 1, 1)

# The committed zeros should still be there.
self.session.begin_transaction('read_timestamp=' + self.timestamp_str(50))
self.session.begin_transaction('read_timestamp=' + self.timestamp_str(read_ts))
v = cursor[updatekey2]
self.assertEqual(v, 0)
cursor.reset()
self.check_next(cursor, updatekey2, 0)
self.check_prev(cursor, updatekey2, 0)
self.session.rollback_transaction()

self.session.begin_transaction('read_timestamp=' + self.timestamp_str(50))
self.session.begin_transaction('read_timestamp=' + self.timestamp_str(read_ts))
v = cursor[appendkey2]
self.assertEqual(v, 0)
cursor.reset()
Expand Down
6 changes: 6 additions & 0 deletions test/suite/test_hs06.py
Expand Up @@ -266,11 +266,17 @@ def test_hs_prepare_reads(self):
prepare_session.commit_transaction(
'commit_timestamp=' + self.timestamp_str(5) + ',durable_timestamp=' + self.timestamp_str(6))

# Specifically check that we can read between commit and durable.
self.session.begin_transaction('read_timestamp=' + self.timestamp_str(5))
for i in range(1, 11):
self.assertEquals(value2, cursor[self.create_key(i)])
self.session.rollback_transaction()

self.session.begin_transaction('read_timestamp=' + self.timestamp_str(6))
for i in range(1, 11):
self.assertEquals(value2, cursor[self.create_key(i)])
self.session.rollback_transaction()

def test_hs_multiple_updates(self):
# Create a small table.
uri = "table:test_hs06"
Expand Down
4 changes: 2 additions & 2 deletions test/suite/test_rollback_to_stable01.py
Expand Up @@ -229,12 +229,12 @@ def test_rollback_to_stable(self):

self.large_updates(uri, valuea, ds, nrows, self.prepare, 10)
# Check that all updates are seen.
self.check(valuea, uri, nrows, None, 10)
self.check(valuea, uri, nrows, None, 11 if self.prepare else 10)

# Remove all keys with newer timestamp.
self.large_removes(uri, ds, nrows, self.prepare, 20)
# Check that the no keys should be visible.
self.check(valuea, uri, 0, nrows, 20)
self.check(valuea, uri, 0, nrows, 21 if self.prepare else 20)

# Pin stable to timestamp 20 if prepare otherwise 10.
if self.prepare:
Expand Down
8 changes: 4 additions & 4 deletions test/suite/test_rollback_to_stable02.py
Expand Up @@ -98,19 +98,19 @@ def test_rollback_to_stable(self):

self.large_updates(uri, valuea, ds, nrows, self.prepare, 10)
# Check that all updates are seen.
self.check(valuea, uri, nrows, None, 10)
self.check(valuea, uri, nrows, None, 11 if self.prepare else 10)

self.large_updates(uri, valueb, ds, nrows, self.prepare, 20)
# Check that the new updates are only seen after the update timestamp.
self.check(valueb, uri, nrows, None, 20)
self.check(valueb, uri, nrows, None, 21 if self.prepare else 20)

self.large_updates(uri, valuec, ds, nrows, self.prepare, 30)
# Check that the new updates are only seen after the update timestamp.
self.check(valuec, uri, nrows, None, 30)
self.check(valuec, uri, nrows, None, 31 if self.prepare else 30)

self.large_updates(uri, valued, ds, nrows, self.prepare, 40)
# Check that the new updates are only seen after the update timestamp.
self.check(valued, uri, nrows, None, 40)
self.check(valued, uri, nrows, None, 41 if self.prepare else 40)

# Pin stable to timestamp 30 if prepare otherwise 20.
if self.prepare:
Expand Down
6 changes: 3 additions & 3 deletions test/suite/test_rollback_to_stable03.py
Expand Up @@ -85,15 +85,15 @@ def test_rollback_to_stable(self):

self.large_updates(uri, valuea, ds, nrows, self.prepare, 10)
# Check that all updates are seen.
self.check(valuea, uri, nrows, None, 10)
self.check(valuea, uri, nrows, None, 11 if self.prepare else 10)

self.large_updates(uri, valueb, ds, nrows, self.prepare, 20)
# Check that all updates are seen.
self.check(valueb, uri, nrows, None, 20)
self.check(valueb, uri, nrows, None, 21 if self.prepare else 20)

self.large_updates(uri, valuec, ds, nrows, self.prepare, 30)
# Check that all updates are seen.
self.check(valuec, uri, nrows, None, 30)
self.check(valuec, uri, nrows, None, 31 if self.prepare else 30)

# Pin stable to timestamp 30 if prepare otherwise 20.
if self.prepare:
Expand Down
26 changes: 13 additions & 13 deletions test/suite/test_rollback_to_stable04.py
Expand Up @@ -123,19 +123,19 @@ def test_rollback_to_stable(self):
self.large_modifies(uri, 'Z', ds, 7, 1, nrows, self.prepare, 140)

# Verify data is visible and correct.
self.check(value_a, uri, nrows, None, 20)
self.check(value_modQ, uri, nrows, None, 30)
self.check(value_modR, uri, nrows, None, 40)
self.check(value_modS, uri, nrows, None, 50)
self.check(value_b, uri, nrows, None, 60)
self.check(value_c, uri, nrows, None, 70)
self.check(value_modT, uri, nrows, None, 80)
self.check(value_d, uri, nrows, None, 90)
self.check(value_modW, uri, nrows, None, 100)
self.check(value_a, uri, nrows, None, 110)
self.check(value_modX, uri, nrows, None, 120)
self.check(value_modY, uri, nrows, None, 130)
self.check(value_modZ, uri, nrows, None, 140)
self.check(value_a, uri, nrows, None, 21 if self.prepare else 20)
self.check(value_modQ, uri, nrows, None, 31 if self.prepare else 30)
self.check(value_modR, uri, nrows, None, 41 if self.prepare else 40)
self.check(value_modS, uri, nrows, None, 51 if self.prepare else 50)
self.check(value_b, uri, nrows, None, 61 if self.prepare else 60)
self.check(value_c, uri, nrows, None, 71 if self.prepare else 70)
self.check(value_modT, uri, nrows, None, 81 if self.prepare else 80)
self.check(value_d, uri, nrows, None, 91 if self.prepare else 90)
self.check(value_modW, uri, nrows, None, 101 if self.prepare else 100)
self.check(value_a, uri, nrows, None, 111 if self.prepare else 110)
self.check(value_modX, uri, nrows, None, 121 if self.prepare else 120)
self.check(value_modY, uri, nrows, None, 131 if self.prepare else 130)
self.check(value_modZ, uri, nrows, None, 141 if self.prepare else 140)

# Pin stable to timestamp 40 if prepare otherwise 30.
if self.prepare:
Expand Down
8 changes: 4 additions & 4 deletions test/suite/test_rollback_to_stable06.py
Expand Up @@ -92,10 +92,10 @@ def test_rollback_to_stable(self):
self.large_updates(uri, value_d, ds, nrows, self.prepare, 50)

# Verify data is visible and correct.
self.check(value_a, uri, nrows, None, 20)
self.check(value_b, uri, nrows, None, 30)
self.check(value_c, uri, nrows, None, 40)
self.check(value_d, uri, nrows, None, 50)
self.check(value_a, uri, nrows, None, 21 if self.prepare else 20)
self.check(value_b, uri, nrows, None, 31 if self.prepare else 30)
self.check(value_c, uri, nrows, None, 41 if self.prepare else 40)
self.check(value_d, uri, nrows, None, 51 if self.prepare else 50)

# Checkpoint to ensure the data is flushed, then rollback to the stable timestamp.
if not self.in_memory:
Expand Down
14 changes: 7 additions & 7 deletions test/suite/test_rollback_to_stable07.py
Expand Up @@ -86,10 +86,10 @@ def test_rollback_to_stable(self):
self.large_updates(uri, value_a, ds, nrows, self.prepare, 50)

# Verify data is visible and correct.
self.check(value_d, uri, nrows, None, 20)
self.check(value_c, uri, nrows, None, 30)
self.check(value_b, uri, nrows, None, 40)
self.check(value_a, uri, nrows, None, 50)
self.check(value_d, uri, nrows, None, 21 if self.prepare else 20)
self.check(value_c, uri, nrows, None, 31 if self.prepare else 30)
self.check(value_b, uri, nrows, None, 41 if self.prepare else 40)
self.check(value_a, uri, nrows, None, 51 if self.prepare else 50)

# Pin stable to timestamp 50 if prepare otherwise 40.
if self.prepare:
Expand All @@ -106,9 +106,9 @@ def test_rollback_to_stable(self):
self.session.checkpoint()

# Verify additional update data is visible and correct.
self.check(value_b, uri, nrows, None, 60)
self.check(value_c, uri, nrows, None, 70)
self.check(value_d, uri, nrows, None, 80)
self.check(value_b, uri, nrows, None, 61 if self.prepare else 60)
self.check(value_c, uri, nrows, None, 71 if self.prepare else 70)
self.check(value_d, uri, nrows, None, 81 if self.prepare else 80)

# Simulate a server crash and restart.
simulate_crash_restart(self, ".", "RESTART")
Expand Down
8 changes: 4 additions & 4 deletions test/suite/test_rollback_to_stable08.py
Expand Up @@ -92,10 +92,10 @@ def test_rollback_to_stable(self):
self.large_updates(uri, value_d, ds, nrows, self.prepare, 50)

# Verify data is visible and correct.
self.check(value_a, uri, nrows, None, 20)
self.check(value_b, uri, nrows, None, 30)
self.check(value_c, uri, nrows, None, 40)
self.check(value_d, uri, nrows, None, 50)
self.check(value_a, uri, nrows, None, 21 if self.prepare else 20)
self.check(value_b, uri, nrows, None, 31 if self.prepare else 30)
self.check(value_c, uri, nrows, None, 41 if self.prepare else 40)
self.check(value_d, uri, nrows, None, 51 if self.prepare else 50)

# Pin stable to timestamp 60 if prepare otherwise 50.
if self.prepare:
Expand Down
34 changes: 17 additions & 17 deletions test/suite/test_rollback_to_stable10.py
Expand Up @@ -104,15 +104,15 @@ def test_rollback_to_stable(self):
self.large_updates(uri_2, value_a, ds_2, nrows, self.prepare, 50)

# Verify data is visible and correct.
self.check(value_d, uri_1, nrows, None, 20)
self.check(value_c, uri_1, nrows, None, 30)
self.check(value_b, uri_1, nrows, None, 40)
self.check(value_a, uri_1, nrows, None, 50)
self.check(value_d, uri_1, nrows, None, 21 if self.prepare else 20)
self.check(value_c, uri_1, nrows, None, 31 if self.prepare else 30)
self.check(value_b, uri_1, nrows, None, 41 if self.prepare else 40)
self.check(value_a, uri_1, nrows, None, 51 if self.prepare else 50)

self.check(value_d, uri_2, nrows, None, 20)
self.check(value_c, uri_2, nrows, None, 30)
self.check(value_b, uri_2, nrows, None, 40)
self.check(value_a, uri_2, nrows, None, 50)
self.check(value_d, uri_2, nrows, None, 21 if self.prepare else 20)
self.check(value_c, uri_2, nrows, None, 31 if self.prepare else 30)
self.check(value_b, uri_2, nrows, None, 41 if self.prepare else 40)
self.check(value_a, uri_2, nrows, None, 51 if self.prepare else 50)

# Pin stable to timestamp 60 if prepare otherwise 50.
if self.prepare:
Expand Down Expand Up @@ -234,15 +234,15 @@ def test_rollback_to_stable_prepare(self):
self.large_updates(uri_2, value_a, ds_2, nrows, self.prepare, 50)

# Verify data is visible and correct.
self.check(value_d, uri_1, nrows, None, 20)
self.check(value_c, uri_1, nrows, None, 30)
self.check(value_b, uri_1, nrows, None, 40)
self.check(value_a, uri_1, nrows, None, 50)

self.check(value_d, uri_2, nrows, None, 20)
self.check(value_c, uri_2, nrows, None, 30)
self.check(value_b, uri_2, nrows, None, 40)
self.check(value_a, uri_2, nrows, None, 50)
self.check(value_d, uri_1, nrows, None, 21 if self.prepare else 20)
self.check(value_c, uri_1, nrows, None, 31 if self.prepare else 30)
self.check(value_b, uri_1, nrows, None, 41 if self.prepare else 40)
self.check(value_a, uri_1, nrows, None, 51 if self.prepare else 50)

self.check(value_d, uri_2, nrows, None, 21 if self.prepare else 20)
self.check(value_c, uri_2, nrows, None, 31 if self.prepare else 30)
self.check(value_b, uri_2, nrows, None, 41 if self.prepare else 40)
self.check(value_a, uri_2, nrows, None, 51 if self.prepare else 50)

# Pin stable to timestamp 60 if prepare otherwise 50.
if self.prepare:
Expand Down
4 changes: 2 additions & 2 deletions test/suite/test_rollback_to_stable11.py
Expand Up @@ -83,7 +83,7 @@ def test_rollback_to_stable(self):
self.large_updates(uri, value_b, ds, nrows, self.prepare, 20)

# Verify data is visible and correct.
self.check(value_b, uri, nrows, None, 20)
self.check(value_b, uri, nrows, None, 21 if self.prepare else 20)

# Pin stable to timestamp 28 if prepare otherwise 20.
# large_updates() prepares at 1 before the timestamp passed (so 29)
Expand All @@ -109,7 +109,7 @@ def test_rollback_to_stable(self):
self.large_updates(uri, value_d, ds, nrows, self.prepare, 30)

# Verify data is visible and correct.
self.check(value_d, uri, nrows, None, 30)
self.check(value_d, uri, nrows, None, 31 if self.prepare else 30)

# Checkpoint to ensure that all the updates are flushed to disk.
self.session.checkpoint()
Expand Down

0 comments on commit bafff54

Please sign in to comment.