From 1400cafb69d22c5c6b1e354e4bff8fbb272d85cc Mon Sep 17 00:00:00 2001 From: "David A. Holland" Date: Sat, 29 Jan 2022 05:03:47 -0500 Subject: [PATCH 01/20] Prohibit reading for updates that are committed but not stable. --- src/include/txn.h | 8 +++++--- src/include/txn_inline.h | 8 ++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/include/txn.h b/src/include/txn.h index 127ae0c2835..56e43260607 100644 --- a/src/include/txn.h +++ b/src/include/txn.h @@ -21,6 +21,7 @@ */ #define WT_TXN_ROLLBACK_REASON_CACHE "oldest pinned transaction ID rolled back for eviction" #define WT_TXN_ROLLBACK_REASON_CONFLICT "conflict between concurrent operations" +#define WT_TXN_ROLLBACK_REASON_NONDURABLE "read conflict with committed but non-durable value" /* AUTOMATIC FLAG VALUE GENERATION START 0 */ #define WT_TXN_LOG_CKPT_CLEANUP 0x01u @@ -42,9 +43,10 @@ /* AUTOMATIC FLAG VALUE GENERATION STOP 32 */ typedef enum { - WT_VISIBLE_FALSE = 0, /* Not a visible update */ - WT_VISIBLE_PREPARE = 1, /* Prepared update */ - WT_VISIBLE_TRUE = 2 /* A visible update */ + WT_VISIBLE_FALSE = 0, /* Not a visible update */ + WT_VISIBLE_PREPARE = 1, /* Prepared update */ + WT_VISIBLE_NONDURABLE = 2, /* Committed but not durable update */ + WT_VISIBLE_TRUE = 3 /* A visible update */ } WT_VISIBLE_TYPE; /* diff --git a/src/include/txn_inline.h b/src/include/txn_inline.h index 798818d7dab..291d81d35a1 100644 --- a/src/include/txn_inline.h +++ b/src/include/txn_inline.h @@ -778,6 +778,11 @@ __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd) return (WT_VISIBLE_TRUE); upd_visible = __wt_txn_visible(session, upd->txnid, upd->start_ts); + if (upd_visible && upd->durable_ts > upd->start_ts) { + upd_visible = __wt_txn_visible(session, upd->txnid, upd->durable_ts); + if (!upd_visible) + return (WT_VISIBLE_NONDURABLE); + } /* * The visibility check is only valid if the update does not change state. If the state does @@ -912,6 +917,9 @@ __wt_txn_read_upd_list_internal(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, if (upd_visible == WT_VISIBLE_TRUE) break; + if (upd_visible == WT_VISIBLE_NONDURABLE) + return (__wt_txn_rollback_required(session, WT_TXN_ROLLBACK_REASON_NONDURABLE)); + /* * Save the prepared update to help us detect if we race with prepared commit or rollback * irrespective of update visibility. From 13732cf67848a71b7d2dc65a7a988ae537cfaba7 Mon Sep 17 00:00:00 2001 From: "David A. Holland" Date: Tue, 1 Feb 2022 03:24:10 -0500 Subject: [PATCH 02/20] Fixes. 1. Don't need to reject reads once stable reaches durable. (Whether or not the value is checkpointed; once it's stable nobody can commit and become durable before it does, so it's ok for them to read it.) This limits the possible problems to things happening shortly after commits. 2. Do need to check on-disk values; they can easily get evicted before they're stable. --- dist/s_string.ok | 1 + src/include/extern.h | 2 ++ src/include/txn_inline.h | 54 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/dist/s_string.ok b/dist/s_string.ok index d3cf93e200e..e92ea77abef 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -1180,6 +1180,7 @@ noclear nocrypto nolock nonces +nondurable nonliteral nontransactionally noop diff --git a/src/include/extern.h b/src/include/extern.h index 570c6fb3b64..f2f9d60eaf8 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1961,6 +1961,8 @@ static inline bool __wt_session_can_wait(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_split_descent_race(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX *saved_pindex) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_txn_tw_start_nondurable(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_txn_tw_start_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_txn_tw_start_visible_all(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) diff --git a/src/include/txn_inline.h b/src/include/txn_inline.h index 291d81d35a1..391ae89c380 100644 --- a/src/include/txn_inline.h +++ b/src/include/txn_inline.h @@ -635,6 +635,40 @@ __wt_txn_tw_start_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) __wt_txn_visible(session, tw->start_txn, tw->start_ts)); } +/* + * __wt_txn_tw_start_nondurable -- + * Is the given start time window not yet durable? It is durable unless we are reading between + * its start time and its durable start time *and* the global stable timestamp has not yet + * advanced past the durable start time. (Note that it is not sufficient to just check that the + * global stable timestamp has advanced past the start time; prepared transactions are allowed + * to commit earlier than stable if they were prepared after stable and stable has since moved + * up.) Assumes caller has already checked that the start time is visible. + */ +static inline bool +__wt_txn_tw_start_nondurable(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) +{ + WT_TXN_GLOBAL *txn_global; + + txn_global = &S2C(session)->txn_global; + + /* If there is no gap between start time and durable start time, we can't read between them. */ + if (tw->durable_start_ts == tw->start_ts) + return (false); + + /* + * If the durable start time is stable, the value might or might not actually be durable + * (depending on whether a checkpoint has been completed) but we cannot become durable *before* + * it, so for us it counts as durable. The purpose of this check is to make sure we can't read a + * nondurable value, then commit and become durable before it, because that violates consistency + * expectations. + */ + if (tw->durable_start_ts <= txn_global->stable_timestamp) + return (false); + + /* Otherwise, if the durable start time is not visible, we are reading before it. */ + return (!__wt_txn_visible(session, tw->start_txn, tw->durable_start_ts)); +} + /* * __wt_txn_tw_start_visible_all -- * Is the given start time window visible to all (possible) readers? @@ -763,9 +797,12 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp static inline WT_VISIBLE_TYPE __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd) { + WT_TXN_GLOBAL *txn_global; uint8_t prepare_state, previous_state; bool upd_visible; + txn_global = &S2C(session)->txn_global; + for (;; __wt_yield()) { /* Prepare state change is in progress, yield and try again. */ WT_ORDERED_READ(prepare_state, upd->prepare_state); @@ -778,7 +815,8 @@ __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd) return (WT_VISIBLE_TRUE); upd_visible = __wt_txn_visible(session, upd->txnid, upd->start_ts); - if (upd_visible && upd->durable_ts > upd->start_ts) { + if (upd_visible && upd->durable_ts > upd->start_ts && + upd->durable_ts > txn_global->stable_timestamp) { upd_visible = __wt_txn_visible(session, upd->txnid, upd->durable_ts); if (!upd_visible) return (WT_VISIBLE_NONDURABLE); @@ -989,10 +1027,11 @@ __wt_txn_read( { WT_TIME_WINDOW tw; WT_UPDATE *prepare_upd, *restored_upd; - bool have_stop_tw, retry; + bool have_stop_tw, retry, return_onpage_value; prepare_upd = restored_upd = NULL; retry = true; + return_onpage_value = false; retry: WT_RET(__wt_txn_read_upd_list_internal(session, cbt, upd, &prepare_upd, &restored_upd)); @@ -1057,7 +1096,16 @@ __wt_txn_read( * 1. The record is from the history store. * 2. It is visible to the reader. */ - if (WT_IS_HS(session->dhandle) || __wt_txn_tw_start_visible(session, &tw)) { + if (WT_IS_HS(session->dhandle)) + return_onpage_value = true; + else if (__wt_txn_tw_start_visible(session, &tw)) { + if (__wt_txn_tw_start_nondurable(session, &tw)) + /* Disallow reading if we can see it but it isn't durable yet. */ + return (__wt_txn_rollback_required(session, WT_TXN_ROLLBACK_REASON_NONDURABLE)); + return_onpage_value = true; + } + + if (return_onpage_value) { if (cbt->upd_value->skip_buf) { cbt->upd_value->buf.data = NULL; cbt->upd_value->buf.size = 0; From 293957ab5f890284a6232dc283ad2227b22388ad Mon Sep 17 00:00:00 2001 From: "David A. Holland" Date: Tue, 1 Feb 2022 03:25:48 -0500 Subject: [PATCH 03/20] Fix up assorted tests. Either move reads from between commit and durable to durable or after, or assert that reading in between fails, or both. --- test/suite/test_durable_rollback_to_stable.py | 2 +- test/suite/test_durable_ts01.py | 2 +- test/suite/test_durable_ts03.py | 10 ++++- test/suite/test_flcs02.py | 38 +++++++++---------- test/suite/test_flcs03.py | 8 +++- test/suite/test_hs06.py | 7 ++++ 6 files changed, 44 insertions(+), 23 deletions(-) diff --git a/test/suite/test_durable_rollback_to_stable.py b/test/suite/test_durable_rollback_to_stable.py index db00d289f80..7b8c3daf0b4 100644 --- a/test/suite/test_durable_rollback_to_stable.py +++ b/test/suite/test_durable_rollback_to_stable.py @@ -100,7 +100,7 @@ def test_durable_rollback_to_stable(self): # Read the first update value with timestamp. self.assertEquals(cursor.reset(), 0) - session.begin_transaction('read_timestamp=' + self.timestamp_str(200)) + session.begin_transaction('read_timestamp=' + self.timestamp_str(220)) self.assertEquals(cursor.next(), 0) for i in range(1, 50): self.assertEquals(cursor.get_value(), ds.value(111)) diff --git a/test/suite/test_durable_ts01.py b/test/suite/test_durable_ts01.py index 29284165d94..b1ca8fbde4e 100644 --- a/test/suite/test_durable_ts01.py +++ b/test/suite/test_durable_ts01.py @@ -99,7 +99,7 @@ def test_durable_ts01(self): # Read the first update value with timestamp. self.assertEquals(cursor.reset(), 0) - session.begin_transaction('read_timestamp=' + self.timestamp_str(200)) + session.begin_transaction('read_timestamp=' + self.timestamp_str(220)) self.assertEquals(cursor.next(), 0) for i in range(1, 50): self.assertEquals(cursor.get_value(), ds.value(111)) diff --git a/test/suite/test_durable_ts03.py b/test/suite/test_durable_ts03.py index 435688b27bf..7aedb40f8c8 100755 --- a/test/suite/test_durable_ts03.py +++ b/test/suite/test_durable_ts03.py @@ -97,9 +97,17 @@ def test_durable_ts03(self): self.assertEqual(value, valueA) session.commit_transaction() - # Read the updated data to confirm that it is visible. + # Check that the updated data cannot be read while it is not yet durable. self.assertEquals(cursor.reset(), 0) session.begin_transaction('read_timestamp=' + self.timestamp_str(210)) + cursor.set_key(1) + self.assertRaisesException(wiredtiger.WiredTigerError, + lambda: cursor.search(), '/WT_ROLLBACK/', False) + session.rollback_transaction() + + # Read the updated data to confirm that it is visible. + self.assertEquals(cursor.reset(), 0) + session.begin_transaction('read_timestamp=' + self.timestamp_str(220)) for key, value in cursor: self.assertEqual(value, valueB) session.commit_transaction() diff --git a/test/suite/test_flcs02.py b/test/suite/test_flcs02.py index 59376cefa43..0f723729f47 100644 --- a/test/suite/test_flcs02.py +++ b/test/suite/test_flcs02.py @@ -30,7 +30,7 @@ from wtdataset import SimpleDataSet from wtscenario import make_scenarios -# test_flcs01.py +# test_flcs02.py # # Test various cases of deleting values and expecting them to read back as 0, # in the presence of timestamps and history. @@ -39,7 +39,7 @@ # evict it explicitly, to make sure that the first section of the test exercises # in-memory update records. (Testing on an in-memory database does not have that # effect.) -class test_flcs01(wttest.WiredTigerTestCase): +class test_flcs02(wttest.WiredTigerTestCase): conn_config = 'in_memory=false' prepare_values = [ @@ -136,21 +136,20 @@ def delete_readback_commit(self, cursor, k, readts, committs): cursor.reset() self.session.rollback_transaction() - self.session.begin_transaction('read_timestamp=' + self.timestamp_str(committs)) - v = cursor[k] - self.assertEqual(v, 0) - cursor.reset() - self.check_next(cursor, k, 0) - self.check_prev(cursor, k, 0) - self.session.rollback_transaction() + def readat(readts): + self.session.begin_transaction('read_timestamp=' + self.timestamp_str(readts)) + v = cursor[k] + self.assertEqual(v, 0) + cursor.reset() + self.check_next(cursor, k, 0) + self.check_prev(cursor, k, 0) + self.session.rollback_transaction() - self.session.begin_transaction('read_timestamp=' + self.timestamp_str(committs + 5)) - v = cursor[k] - self.assertEqual(v, 0) - cursor.reset() - self.check_next(cursor, k, 0) - self.check_prev(cursor, k, 0) - self.session.rollback_transaction() + if not self.do_prepare: + # Cannot read between commit and durable. + readat(committs) + readat(committs+1) + readat(committs+5) def test_flcs(self): uri = "table:test_flcs02" @@ -264,7 +263,8 @@ def test_flcs(self): self.session.rollback_transaction() # This should definitely have extended the table in the present. - self.session.begin_transaction('read_timestamp=' + self.timestamp_str(50)) + read_ts = 51 if self.do_prepare else 50 + self.session.begin_transaction('read_timestamp=' + self.timestamp_str(read_ts)) v = cursor[appendkey2] self.assertEqual(v, 0) cursor.reset() @@ -276,7 +276,7 @@ def test_flcs(self): self.evict(uri, 1, 1) # The committed zeros should still be there. - self.session.begin_transaction('read_timestamp=' + self.timestamp_str(50)) + self.session.begin_transaction('read_timestamp=' + self.timestamp_str(read_ts)) v = cursor[updatekey2] self.assertEqual(v, 0) cursor.reset() @@ -284,7 +284,7 @@ def test_flcs(self): self.check_prev(cursor, updatekey2, 0) self.session.rollback_transaction() - self.session.begin_transaction('read_timestamp=' + self.timestamp_str(50)) + self.session.begin_transaction('read_timestamp=' + self.timestamp_str(read_ts)) v = cursor[appendkey2] self.assertEqual(v, 0) cursor.reset() diff --git a/test/suite/test_flcs03.py b/test/suite/test_flcs03.py index 30932a9e1f0..3f60a3e6735 100644 --- a/test/suite/test_flcs03.py +++ b/test/suite/test_flcs03.py @@ -205,6 +205,12 @@ def test_flcs(self): self.check_end(cursor, None, 5, append_key) self.check_end(cursor, None, 10, append_key) self.check_end(cursor, None, 19, append_key) - self.check_end(cursor, None, 20, append_key) + if self.prepare and self.op == 'deleted': + # Cannot read between commit and durable timestamps. + self.assertRaisesException(wiredtiger.WiredTigerError, + lambda: self.check_end(cursor, None, 20, append_key), '/WT_ROLLBACK/', False) + self.session.rollback_transaction() + else: + self.check_end(cursor, None, 20, append_key) self.check_end(cursor, None, 21, append_key) self.check_end(cursor, None, 30, append_key) diff --git a/test/suite/test_hs06.py b/test/suite/test_hs06.py index 42b502df4b2..67d891e3ba6 100644 --- a/test/suite/test_hs06.py +++ b/test/suite/test_hs06.py @@ -266,7 +266,14 @@ def test_hs_prepare_reads(self): prepare_session.commit_transaction( 'commit_timestamp=' + self.timestamp_str(5) + ',durable_timestamp=' + self.timestamp_str(6)) + # Cannot read between commit and durable. self.session.begin_transaction('read_timestamp=' + self.timestamp_str(5)) + for i in range(1, 11): + self.assertRaisesException(wiredtiger.WiredTigerError, + lambda: cursor[self.create_key(i)], '/WT_ROLLBACK/', False) + self.session.rollback_transaction() + + self.session.begin_transaction('read_timestamp=' + self.timestamp_str(6)) for i in range(1, 11): self.assertEquals(value2, cursor[self.create_key(i)]) self.session.rollback_transaction() From dcf4fdfe51e641ad45ae6b38a7716d56effbbf48 Mon Sep 17 00:00:00 2001 From: "David A. Holland" Date: Wed, 2 Feb 2022 00:21:41 -0500 Subject: [PATCH 04/20] Fix up the RTS tests. Whole pile of these failed, but fortunately they all have pretty much the same structure and needed the same fix. --- test/suite/test_rollback_to_stable01.py | 4 +-- test/suite/test_rollback_to_stable02.py | 8 +++--- test/suite/test_rollback_to_stable03.py | 6 ++--- test/suite/test_rollback_to_stable04.py | 26 +++++++++---------- test/suite/test_rollback_to_stable06.py | 8 +++--- test/suite/test_rollback_to_stable07.py | 14 +++++----- test/suite/test_rollback_to_stable08.py | 8 +++--- test/suite/test_rollback_to_stable10.py | 34 ++++++++++++------------- test/suite/test_rollback_to_stable11.py | 4 +-- test/suite/test_rollback_to_stable12.py | 2 +- test/suite/test_rollback_to_stable13.py | 24 ++++++++--------- test/suite/test_rollback_to_stable14.py | 22 ++++++++-------- test/suite/test_rollback_to_stable18.py | 4 +-- test/suite/test_rollback_to_stable23.py | 10 ++++---- 14 files changed, 87 insertions(+), 87 deletions(-) diff --git a/test/suite/test_rollback_to_stable01.py b/test/suite/test_rollback_to_stable01.py index fc2a589e142..00af01d93bd 100755 --- a/test/suite/test_rollback_to_stable01.py +++ b/test/suite/test_rollback_to_stable01.py @@ -233,12 +233,12 @@ def test_rollback_to_stable(self): self.large_updates(uri, valuea, ds, nrows, self.prepare, 10) # Check that all updates are seen. - self.check(valuea, uri, nrows, None, 10) + self.check(valuea, uri, nrows, None, 11 if self.prepare else 10) # Remove all keys with newer timestamp. self.large_removes(uri, ds, nrows, self.prepare, 20) # Check that the no keys should be visible. - self.check(valuea, uri, 0, nrows, 20) + self.check(valuea, uri, 0, nrows, 21 if self.prepare else 20) # Pin stable to timestamp 20 if prepare otherwise 10. if self.prepare: diff --git a/test/suite/test_rollback_to_stable02.py b/test/suite/test_rollback_to_stable02.py index 47ad3ac3f00..5c1f3705950 100755 --- a/test/suite/test_rollback_to_stable02.py +++ b/test/suite/test_rollback_to_stable02.py @@ -101,19 +101,19 @@ def test_rollback_to_stable(self): self.large_updates(uri, valuea, ds, nrows, self.prepare, 10) # Check that all updates are seen. - self.check(valuea, uri, nrows, None, 10) + self.check(valuea, uri, nrows, None, 11 if self.prepare else 10) self.large_updates(uri, valueb, ds, nrows, self.prepare, 20) # Check that the new updates are only seen after the update timestamp. - self.check(valueb, uri, nrows, None, 20) + self.check(valueb, uri, nrows, None, 21 if self.prepare else 20) self.large_updates(uri, valuec, ds, nrows, self.prepare, 30) # Check that the new updates are only seen after the update timestamp. - self.check(valuec, uri, nrows, None, 30) + self.check(valuec, uri, nrows, None, 31 if self.prepare else 30) self.large_updates(uri, valued, ds, nrows, self.prepare, 40) # Check that the new updates are only seen after the update timestamp. - self.check(valued, uri, nrows, None, 40) + self.check(valued, uri, nrows, None, 41 if self.prepare else 40) # Pin stable to timestamp 30 if prepare otherwise 20. if self.prepare: diff --git a/test/suite/test_rollback_to_stable03.py b/test/suite/test_rollback_to_stable03.py index 2adbf8a0d19..dc26a55a397 100755 --- a/test/suite/test_rollback_to_stable03.py +++ b/test/suite/test_rollback_to_stable03.py @@ -89,15 +89,15 @@ def test_rollback_to_stable(self): self.large_updates(uri, valuea, ds, nrows, self.prepare, 10) # Check that all updates are seen. - self.check(valuea, uri, nrows, None, 10) + self.check(valuea, uri, nrows, None, 11 if self.prepare else 10) self.large_updates(uri, valueb, ds, nrows, self.prepare, 20) # Check that all updates are seen. - self.check(valueb, uri, nrows, None, 20) + self.check(valueb, uri, nrows, None, 21 if self.prepare else 20) self.large_updates(uri, valuec, ds, nrows, self.prepare, 30) # Check that all updates are seen. - self.check(valuec, uri, nrows, None, 30) + self.check(valuec, uri, nrows, None, 31 if self.prepare else 30) # Pin stable to timestamp 30 if prepare otherwise 20. if self.prepare: diff --git a/test/suite/test_rollback_to_stable04.py b/test/suite/test_rollback_to_stable04.py index ae445b4bff2..eecf0f30540 100755 --- a/test/suite/test_rollback_to_stable04.py +++ b/test/suite/test_rollback_to_stable04.py @@ -125,19 +125,19 @@ def test_rollback_to_stable(self): self.large_modifies(uri, 'Z', ds, 7, 1, nrows, self.prepare, 140) # Verify data is visible and correct. - self.check(value_a, uri, nrows, None, 20) - self.check(value_modQ, uri, nrows, None, 30) - self.check(value_modR, uri, nrows, None, 40) - self.check(value_modS, uri, nrows, None, 50) - self.check(value_b, uri, nrows, None, 60) - self.check(value_c, uri, nrows, None, 70) - self.check(value_modT, uri, nrows, None, 80) - self.check(value_d, uri, nrows, None, 90) - self.check(value_modW, uri, nrows, None, 100) - self.check(value_a, uri, nrows, None, 110) - self.check(value_modX, uri, nrows, None, 120) - self.check(value_modY, uri, nrows, None, 130) - self.check(value_modZ, uri, nrows, None, 140) + self.check(value_a, uri, nrows, None, 21 if self.prepare else 20) + self.check(value_modQ, uri, nrows, None, 31 if self.prepare else 30) + self.check(value_modR, uri, nrows, None, 41 if self.prepare else 40) + self.check(value_modS, uri, nrows, None, 51 if self.prepare else 50) + self.check(value_b, uri, nrows, None, 61 if self.prepare else 60) + self.check(value_c, uri, nrows, None, 71 if self.prepare else 70) + self.check(value_modT, uri, nrows, None, 81 if self.prepare else 80) + self.check(value_d, uri, nrows, None, 91 if self.prepare else 90) + self.check(value_modW, uri, nrows, None, 101 if self.prepare else 100) + self.check(value_a, uri, nrows, None, 111 if self.prepare else 110) + self.check(value_modX, uri, nrows, None, 121 if self.prepare else 120) + self.check(value_modY, uri, nrows, None, 131 if self.prepare else 130) + self.check(value_modZ, uri, nrows, None, 141 if self.prepare else 140) # Pin stable to timestamp 40 if prepare otherwise 30. if self.prepare: diff --git a/test/suite/test_rollback_to_stable06.py b/test/suite/test_rollback_to_stable06.py index f7fe6129487..c2507dfb55e 100755 --- a/test/suite/test_rollback_to_stable06.py +++ b/test/suite/test_rollback_to_stable06.py @@ -94,10 +94,10 @@ def test_rollback_to_stable(self): self.large_updates(uri, value_d, ds, nrows, self.prepare, 50) # Verify data is visible and correct. - self.check(value_a, uri, nrows, None, 20) - self.check(value_b, uri, nrows, None, 30) - self.check(value_c, uri, nrows, None, 40) - self.check(value_d, uri, nrows, None, 50) + self.check(value_a, uri, nrows, None, 21 if self.prepare else 20) + self.check(value_b, uri, nrows, None, 31 if self.prepare else 30) + self.check(value_c, uri, nrows, None, 41 if self.prepare else 40) + self.check(value_d, uri, nrows, None, 51 if self.prepare else 50) # Checkpoint to ensure the data is flushed, then rollback to the stable timestamp. if not self.in_memory: diff --git a/test/suite/test_rollback_to_stable07.py b/test/suite/test_rollback_to_stable07.py index 19cdba56446..fce71b45a7a 100755 --- a/test/suite/test_rollback_to_stable07.py +++ b/test/suite/test_rollback_to_stable07.py @@ -89,10 +89,10 @@ def test_rollback_to_stable(self): self.large_updates(uri, value_a, ds, nrows, self.prepare, 50) # Verify data is visible and correct. - self.check(value_d, uri, nrows, None, 20) - self.check(value_c, uri, nrows, None, 30) - self.check(value_b, uri, nrows, None, 40) - self.check(value_a, uri, nrows, None, 50) + self.check(value_d, uri, nrows, None, 21 if self.prepare else 20) + self.check(value_c, uri, nrows, None, 31 if self.prepare else 30) + self.check(value_b, uri, nrows, None, 41 if self.prepare else 40) + self.check(value_a, uri, nrows, None, 51 if self.prepare else 50) # Pin stable to timestamp 50 if prepare otherwise 40. if self.prepare: @@ -109,9 +109,9 @@ def test_rollback_to_stable(self): self.session.checkpoint() # Verify additional update data is visible and correct. - self.check(value_b, uri, nrows, None, 60) - self.check(value_c, uri, nrows, None, 70) - self.check(value_d, uri, nrows, None, 80) + self.check(value_b, uri, nrows, None, 61 if self.prepare else 60) + self.check(value_c, uri, nrows, None, 71 if self.prepare else 70) + self.check(value_d, uri, nrows, None, 81 if self.prepare else 80) # Simulate a server crash and restart. simulate_crash_restart(self, ".", "RESTART") diff --git a/test/suite/test_rollback_to_stable08.py b/test/suite/test_rollback_to_stable08.py index 04a303fef2b..5cf814e253b 100755 --- a/test/suite/test_rollback_to_stable08.py +++ b/test/suite/test_rollback_to_stable08.py @@ -94,10 +94,10 @@ def test_rollback_to_stable(self): self.large_updates(uri, value_d, ds, nrows, self.prepare, 50) # Verify data is visible and correct. - self.check(value_a, uri, nrows, None, 20) - self.check(value_b, uri, nrows, None, 30) - self.check(value_c, uri, nrows, None, 40) - self.check(value_d, uri, nrows, None, 50) + self.check(value_a, uri, nrows, None, 21 if self.prepare else 20) + self.check(value_b, uri, nrows, None, 31 if self.prepare else 30) + self.check(value_c, uri, nrows, None, 41 if self.prepare else 40) + self.check(value_d, uri, nrows, None, 51 if self.prepare else 50) # Pin stable to timestamp 60 if prepare otherwise 50. if self.prepare: diff --git a/test/suite/test_rollback_to_stable10.py b/test/suite/test_rollback_to_stable10.py index 369e387ac65..2118234feb1 100755 --- a/test/suite/test_rollback_to_stable10.py +++ b/test/suite/test_rollback_to_stable10.py @@ -106,15 +106,15 @@ def test_rollback_to_stable(self): self.large_updates(uri_2, value_a, ds_2, nrows, self.prepare, 50) # Verify data is visible and correct. - self.check(value_d, uri_1, nrows, None, 20) - self.check(value_c, uri_1, nrows, None, 30) - self.check(value_b, uri_1, nrows, None, 40) - self.check(value_a, uri_1, nrows, None, 50) + self.check(value_d, uri_1, nrows, None, 21 if self.prepare else 20) + self.check(value_c, uri_1, nrows, None, 31 if self.prepare else 30) + self.check(value_b, uri_1, nrows, None, 41 if self.prepare else 40) + self.check(value_a, uri_1, nrows, None, 51 if self.prepare else 50) - self.check(value_d, uri_2, nrows, None, 20) - self.check(value_c, uri_2, nrows, None, 30) - self.check(value_b, uri_2, nrows, None, 40) - self.check(value_a, uri_2, nrows, None, 50) + self.check(value_d, uri_2, nrows, None, 21 if self.prepare else 20) + self.check(value_c, uri_2, nrows, None, 31 if self.prepare else 30) + self.check(value_b, uri_2, nrows, None, 41 if self.prepare else 40) + self.check(value_a, uri_2, nrows, None, 51 if self.prepare else 50) # Pin stable to timestamp 60 if prepare otherwise 50. if self.prepare: @@ -236,15 +236,15 @@ def test_rollback_to_stable_prepare(self): self.large_updates(uri_2, value_a, ds_2, nrows, self.prepare, 50) # Verify data is visible and correct. - self.check(value_d, uri_1, nrows, None, 20) - self.check(value_c, uri_1, nrows, None, 30) - self.check(value_b, uri_1, nrows, None, 40) - self.check(value_a, uri_1, nrows, None, 50) - - self.check(value_d, uri_2, nrows, None, 20) - self.check(value_c, uri_2, nrows, None, 30) - self.check(value_b, uri_2, nrows, None, 40) - self.check(value_a, uri_2, nrows, None, 50) + self.check(value_d, uri_1, nrows, None, 21 if self.prepare else 20) + self.check(value_c, uri_1, nrows, None, 31 if self.prepare else 30) + self.check(value_b, uri_1, nrows, None, 41 if self.prepare else 40) + self.check(value_a, uri_1, nrows, None, 51 if self.prepare else 50) + + self.check(value_d, uri_2, nrows, None, 21 if self.prepare else 20) + self.check(value_c, uri_2, nrows, None, 31 if self.prepare else 30) + self.check(value_b, uri_2, nrows, None, 41 if self.prepare else 40) + self.check(value_a, uri_2, nrows, None, 51 if self.prepare else 50) # Pin stable to timestamp 60 if prepare otherwise 50. if self.prepare: diff --git a/test/suite/test_rollback_to_stable11.py b/test/suite/test_rollback_to_stable11.py index ed1a8e50285..7b523f88488 100755 --- a/test/suite/test_rollback_to_stable11.py +++ b/test/suite/test_rollback_to_stable11.py @@ -86,7 +86,7 @@ def test_rollback_to_stable(self): self.large_updates(uri, value_b, ds, nrows, self.prepare, 20) # Verify data is visible and correct. - self.check(value_b, uri, nrows, None, 20) + self.check(value_b, uri, nrows, None, 21 if self.prepare else 20) # Pin stable to timestamp 30 if prepare otherwise 20. if self.prepare: @@ -110,7 +110,7 @@ def test_rollback_to_stable(self): self.large_updates(uri, value_d, ds, nrows, self.prepare, 30) # Verify data is visible and correct. - self.check(value_d, uri, nrows, None, 30) + self.check(value_d, uri, nrows, None, 31 if self.prepare else 30) # Checkpoint to ensure that all the updates are flushed to disk. self.session.checkpoint() diff --git a/test/suite/test_rollback_to_stable12.py b/test/suite/test_rollback_to_stable12.py index dc25343a558..12aae476967 100755 --- a/test/suite/test_rollback_to_stable12.py +++ b/test/suite/test_rollback_to_stable12.py @@ -85,7 +85,7 @@ def test_rollback_to_stable(self): self.large_updates(uri, value_a, ds, nrows, self.prepare, 20) # Verify data is visible and correct. - self.check(value_a, uri, nrows, None, 20) + self.check(value_a, uri, nrows, None, 21 if self.prepare else 20) # Pin stable to timestamp 30 if prepare otherwise 20. if self.prepare: diff --git a/test/suite/test_rollback_to_stable13.py b/test/suite/test_rollback_to_stable13.py index 9ca468d38b5..27a0e782290 100644 --- a/test/suite/test_rollback_to_stable13.py +++ b/test/suite/test_rollback_to_stable13.py @@ -85,9 +85,9 @@ def test_rollback_to_stable(self): # Verify data is visible and correct. # (In FLCS, the removed rows should read back as zero.) - self.check(value_a, uri, nrows, None, 20) - self.check(None, uri, 0, nrows, 30) - self.check(value_b, uri, nrows, None, 60) + self.check(value_a, uri, nrows, None, 21 if self.prepare else 20) + self.check(None, uri, 0, nrows, 31 if self.prepare else 30) + self.check(value_b, uri, nrows, None, 61 if self.prepare else 60) # Pin stable to timestamp 50 if prepare otherwise 40. if self.prepare: @@ -161,9 +161,9 @@ def test_rollback_to_stable_with_aborted_updates(self): # Verify data is visible and correct. # (In FLCS, the removed rows should read back as zero.) - self.check(value_a, uri, nrows, None, 20) - self.check(None, uri, 0, nrows, 30) - self.check(value_d, uri, nrows, None, 60) + self.check(value_a, uri, nrows, None, 21 if self.prepare else 20) + self.check(None, uri, 0, nrows, 31 if self.prepare else 30) + self.check(value_d, uri, nrows, None, 61 if self.prepare else 60) # Pin stable to timestamp 50 if prepare otherwise 40. if self.prepare: @@ -238,9 +238,9 @@ def test_rollback_to_stable_with_history_tombstone(self): # Verify data is visible and correct. # (In FLCS, the removed rows should read back as zero.) - self.check(value_a, uri, nrows, None, 20) - self.check(None, uri, 0, nrows, 40) - self.check(value_c, uri, nrows, None, 60) + self.check(value_a, uri, nrows, None, 21 if self.prepare else 20) + self.check(None, uri, 0, nrows, 41 if self.prepare else 40) + self.check(value_c, uri, nrows, None, 61 if self.prepare else 60) # Simulate a server crash and restart. simulate_crash_restart(self, ".", "RESTART") @@ -294,9 +294,9 @@ def test_rollback_to_stable_with_stable_remove(self): # Verify data is visible and correct. # (In FLCS, the removed rows should read back as zero.) - self.check(value_a, uri, nrows, None, 20) - self.check(None, uri, 0, nrows, 40) - self.check(value_c, uri, nrows, None, 60) + self.check(value_a, uri, nrows, None, 21 if self.prepare else 20) + self.check(None, uri, 0, nrows, 41 if self.prepare else 40) + self.check(value_c, uri, nrows, None, 61 if self.prepare else 60) self.conn.rollback_to_stable() # Perform several updates and checkpoint. diff --git a/test/suite/test_rollback_to_stable14.py b/test/suite/test_rollback_to_stable14.py index 6930b91ed07..71888becfff 100755 --- a/test/suite/test_rollback_to_stable14.py +++ b/test/suite/test_rollback_to_stable14.py @@ -98,11 +98,11 @@ def test_rollback_to_stable(self): self.large_modifies(uri, 'T', ds, 3, 1, nrows, self.prepare, 60) # Verify data is visible and correct. - self.check(value_a, uri, nrows, None, 20) - self.check(value_modQ, uri, nrows, None, 30) - self.check(value_modR, uri, nrows, None, 40) - self.check(value_modS, uri, nrows, None, 50) - self.check(value_modT, uri, nrows, None, 60) + self.check(value_a, uri, nrows, None, 21 if self.prepare else 20) + self.check(value_modQ, uri, nrows, None, 31 if self.prepare else 30) + self.check(value_modR, uri, nrows, None, 41 if self.prepare else 40) + self.check(value_modS, uri, nrows, None, 51 if self.prepare else 50) + self.check(value_modT, uri, nrows, None, 61 if self.prepare else 60) # Pin stable to timestamp 60 if prepare otherwise 50. if self.prepare: @@ -216,9 +216,9 @@ def test_rollback_to_stable_same_ts(self): self.large_modifies(uri, 'T', ds, 3, 1, nrows, self.prepare, 60) # Verify data is visible and correct. - self.check(value_a, uri, nrows, None, 20) - self.check(value_modQ, uri, nrows, None, 30) - self.check(value_modT, uri, nrows, None, 60) + self.check(value_a, uri, nrows, None, 21 if self.prepare else 20) + self.check(value_modQ, uri, nrows, None, 31 if self.prepare else 30) + self.check(value_modT, uri, nrows, None, 61 if self.prepare else 60) self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(50)) @@ -326,9 +326,9 @@ def test_rollback_to_stable_same_ts_append(self): self.large_modifies(uri, 'T', ds, len(value_modS), 1, nrows, self.prepare, 60) # Verify data is visible and correct. - self.check(value_a, uri, nrows, None, 20) - self.check(value_modQ, uri, nrows, None, 30) - self.check(value_modT, uri, nrows, None, 60) + self.check(value_a, uri, nrows, None, 21 if self.prepare else 20) + self.check(value_modQ, uri, nrows, None, 31 if self.prepare else 30) + self.check(value_modT, uri, nrows, None, 61 if self.prepare else 60) self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(50)) diff --git a/test/suite/test_rollback_to_stable18.py b/test/suite/test_rollback_to_stable18.py index 06bb15a4ecd..bdf6487c759 100644 --- a/test/suite/test_rollback_to_stable18.py +++ b/test/suite/test_rollback_to_stable18.py @@ -84,8 +84,8 @@ def test_rollback_to_stable(self): self.large_removes(uri, ds, nrows, self.prepare, 30) # Verify data is visible and correct. - self.check(value_a, uri, nrows, None, 20) - self.check(None, uri, 0, nrows, 30) + self.check(value_a, uri, nrows, None, 21 if self.prepare else 20) + self.check(None, uri, 0, nrows, 31 if self.prepare else 30) # Configure debug behavior on a cursor to evict the page positioned on when the reset API is used. evict_cursor = self.session.open_cursor(uri, None, "debug=(release_evict)") diff --git a/test/suite/test_rollback_to_stable23.py b/test/suite/test_rollback_to_stable23.py index 17aeda9e37c..3487499a1f8 100644 --- a/test/suite/test_rollback_to_stable23.py +++ b/test/suite/test_rollback_to_stable23.py @@ -98,11 +98,11 @@ def test_rollback_to_stable(self): self.large_modifies(uri, 'T', ds, 3, 1, nrows, self.prepare, 60) # Verify data is visible and correct. - self.check(value_a, uri, nrows, None, 20) - self.check(value_modQ, uri, nrows, None, 30) - self.check(value_modR, uri, nrows, None, 40) - self.check(value_modS, uri, nrows, None, 50) - self.check(value_modT, uri, nrows, None, 60) + self.check(value_a, uri, nrows, None, 21 if self.prepare else 20) + self.check(value_modQ, uri, nrows, None, 31 if self.prepare else 30) + self.check(value_modR, uri, nrows, None, 41 if self.prepare else 40) + self.check(value_modS, uri, nrows, None, 51 if self.prepare else 50) + self.check(value_modT, uri, nrows, None, 61 if self.prepare else 60) # Pin stable to timestamp 60 if prepare otherwise 50. if self.prepare: From 41dc3fd48d1ef910e2ee2a024adfd5985a677958 Mon Sep 17 00:00:00 2001 From: "David A. Holland" Date: Wed, 2 Feb 2022 19:56:12 -0500 Subject: [PATCH 05/20] Handle on-disk nondurable stop times as well as start times. (this makes reads of nondurable removes fail once they've been evicted) Add a specific test for this issue, that among other things makes sure that the restrictions don't interfere with RTS. --- src/include/extern.h | 4 + src/include/txn_inline.h | 79 ++++++++---- test/suite/test_durable_ts04.py | 214 ++++++++++++++++++++++++++++++++ 3 files changed, 271 insertions(+), 26 deletions(-) create mode 100644 test/suite/test_durable_ts04.py diff --git a/src/include/extern.h b/src/include/extern.h index f2f9d60eaf8..e00b3199fa1 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1961,12 +1961,16 @@ static inline bool __wt_session_can_wait(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_split_descent_race(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX *saved_pindex) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_txn_ts_nondurable(WT_SESSION_IMPL *session, uint64_t txnid, + uint64_t commit_ts, uint64_t durable_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_txn_tw_start_nondurable(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_txn_tw_start_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_txn_tw_start_visible_all(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_txn_tw_stop_nondurable(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_txn_tw_stop_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_txn_tw_stop_visible_all(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) diff --git a/src/include/txn_inline.h b/src/include/txn_inline.h index 391ae89c380..c223fac205a 100644 --- a/src/include/txn_inline.h +++ b/src/include/txn_inline.h @@ -606,6 +606,41 @@ __wt_txn_upd_value_visible_all(WT_SESSION_IMPL *session, WT_UPDATE_VALUE *upd_va __wt_txn_visible_all(session, upd_value->tw.start_txn, upd_value->tw.durable_start_ts)); } +/* + * __wt_txn_ts_nondurable -- + * Is the given timestamp not yet durable? It is durable unless we are reading between the + * commit time the durable time *and* the global stable timestamp has not yet advanced past the + * durable time. (Note that it is not sufficient to just check that the global stable timestamp + * has advanced past the commit time; prepared transactions are allowed to commit earlier than + * stable if they were prepared after stable and stable has since moved up.) Assumes caller has + * already checked that the commit time is visible and skips checks that makes redundant. + */ +static inline bool +__wt_txn_ts_nondurable( + WT_SESSION_IMPL *session, uint64_t txnid, uint64_t commit_ts, uint64_t durable_ts) +{ + WT_TXN_GLOBAL *txn_global; + + txn_global = &S2C(session)->txn_global; + + /* If there is no gap between commit time and durable time, we can't read between them. */ + if (durable_ts == commit_ts) + return (false); + + /* + * If the durable time is stable, the value might or might not actually be durable (depending on + * whether a checkpoint has been completed) but we (the caller) cannot become durable before it, + * so for us it counts as durable. The purpose of this check is to make sure we can't read a + * nondurable value, then commit and become durable before it, because that violates consistency + * expectations. + */ + if (durable_ts <= txn_global->stable_timestamp) + return (false); + + /* Otherwise, if the durable time is not visible, we are reading before it. */ + return (!__wt_txn_visible(session, txnid, durable_ts)); +} + /* * __wt_txn_tw_stop_visible -- * Is the given stop time window visible? @@ -617,6 +652,18 @@ __wt_txn_tw_stop_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) __wt_txn_visible(session, tw->stop_txn, tw->stop_ts)); } +/* + * __wt_txn_tw_stop_nondurable -- + * Is the given stop time window not yet durable? See notes about the start time below. Assumes + * the caller has already checked that the stop time is visible, so among other things we know + * that the time window *does* have a stop time. + */ +static inline bool +__wt_txn_tw_stop_nondurable(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) +{ + return (__wt_txn_ts_nondurable(session, tw->stop_txn, tw->stop_ts, tw->durable_stop_ts)); +} + /* * __wt_txn_tw_start_visible -- * Is the given start time window visible? @@ -637,36 +684,13 @@ __wt_txn_tw_start_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) /* * __wt_txn_tw_start_nondurable -- - * Is the given start time window not yet durable? It is durable unless we are reading between - * its start time and its durable start time *and* the global stable timestamp has not yet - * advanced past the durable start time. (Note that it is not sufficient to just check that the - * global stable timestamp has advanced past the start time; prepared transactions are allowed - * to commit earlier than stable if they were prepared after stable and stable has since moved - * up.) Assumes caller has already checked that the start time is visible. + * Is the given start time window not yet durable? Assumes caller has already checked that the + * start time is visible. */ static inline bool __wt_txn_tw_start_nondurable(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) { - WT_TXN_GLOBAL *txn_global; - - txn_global = &S2C(session)->txn_global; - - /* If there is no gap between start time and durable start time, we can't read between them. */ - if (tw->durable_start_ts == tw->start_ts) - return (false); - - /* - * If the durable start time is stable, the value might or might not actually be durable - * (depending on whether a checkpoint has been completed) but we cannot become durable *before* - * it, so for us it counts as durable. The purpose of this check is to make sure we can't read a - * nondurable value, then commit and become durable before it, because that violates consistency - * expectations. - */ - if (tw->durable_start_ts <= txn_global->stable_timestamp) - return (false); - - /* Otherwise, if the durable start time is not visible, we are reading before it. */ - return (!__wt_txn_visible(session, tw->start_txn, tw->durable_start_ts)); + return (__wt_txn_ts_nondurable(session, tw->start_txn, tw->start_ts, tw->durable_start_ts)); } /* @@ -1073,6 +1097,9 @@ __wt_txn_read( */ if (!have_stop_tw && __wt_txn_tw_stop_visible(session, &tw) && !F_ISSET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE)) { + if (__wt_txn_tw_stop_nondurable(session, &tw)) + /* Disallow reading if we can see it but it isn't durable yet. */ + return (__wt_txn_rollback_required(session, WT_TXN_ROLLBACK_REASON_NONDURABLE)); cbt->upd_value->buf.data = NULL; cbt->upd_value->buf.size = 0; cbt->upd_value->tw.durable_stop_ts = tw.durable_stop_ts; diff --git a/test/suite/test_durable_ts04.py b/test/suite/test_durable_ts04.py new file mode 100644 index 00000000000..ab28385db6a --- /dev/null +++ b/test/suite/test_durable_ts04.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python +# +# Public Domain 2014-present MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +from helper import simulate_crash_restart +import wiredtiger, wttest +from wiredtiger import WT_NOTFOUND +from wtscenario import make_scenarios + +# test_durable_ts04.py +# +# Prepared transactions can have a durable timestamp that's greater than the commit +# timestamp. This means that the committed data becomes visible before it's durable, +# which opens a window for someone to read it and then commit and become durable +# before it, which in turn leads to inconsistency because a crash will preserve the +# second transaction but not the first. +# +# To avoid this problem, we prohibit reads between the commit time and the durable +# time, until the global timestamp reaches the durable time. +# +# This test makes sure that mechanism works as intended. +# 1. Reading nondurable data produces WT_ROLLBACK. +# 2. Advancing stable to the durable timestamp eliminates this behavior. +# 3. The behavior applies to both updates and removes. +# 4. The behavior applies to pages that have been evicted as well as to in-memory updates. +# 5. Rollback-to-stable can successfully roll back the transaction before it becomes stable. +# +# Additionally there are two ways to set up the scenario; one involves committing +# before stable. (This is explicitly permitted if stable has moved up since prepare, as +# long as the prepare happend after stable.) + +class test_durable_ts04(wttest.WiredTigerTestCase): + conn_config = 'cache_size=10MB' + + format_values = [ + ('integer-row', dict(key_format='i', value_format='S')), + ('column', dict(key_format='r', value_format='S')), + ('column-fix', dict(key_format='r', value_format='8t')), + ] + commit_values = [ + ('commit_before_stable', dict(commit_before_stable=True)), + ('commit_after_stable', dict(commit_before_stable=False)), + ] + remove_values = [ + ('update', dict(do_remove=False)), + ('remove', dict(do_remove=True)), + ] + evict_values = [ + ('noevict', dict(do_evict=False)), + ('evict', dict(do_evict=True)), + ] + checkpoint_values = [ + ('nocheckpoint', dict(do_checkpoint=False)), + ('checkpoint', dict(do_checkpoint=True)), + ] + op_values = [ + ('crash', dict(op='crash')), + ('rollback', dict(op='rollback')), + ('stabilize', dict(op='stabilize')), + ] + scenarios = make_scenarios(format_values, + commit_values, remove_values, evict_values, checkpoint_values, op_values) + + Deleted = 1234567 # Choose this to be different from any legal value. + + def check_range(self, uri, lo, hi, value, read_ts): + cursor = self.session.open_cursor(uri) + if value is None: + for i in range(lo, hi): + self.session.begin_transaction('read_timestamp=' + self.timestamp_str(read_ts)) + self.assertRaisesException(wiredtiger.WiredTigerError, + lambda: cursor[i], '/WT_ROLLBACK/', False) + self.session.rollback_transaction() + else: + self.session.begin_transaction('read_timestamp=' + self.timestamp_str(read_ts)) + for i in range(lo, hi): + if value == self.Deleted: + cursor.set_key(i) + self.assertEqual(cursor.search(), WT_NOTFOUND) + else: + self.assertEqual(cursor[i], value) + self.session.rollback_transaction() + cursor.close() + + def check(self, uri, nrows, low_value, high_value, read_ts): + self.check_range(uri, 1, nrows // 2, low_value, read_ts) + self.check_range(uri, nrows // 2, nrows + 1, high_value, read_ts) + + def evict(self, uri, lo, hi, value, read_ts): + evict_cursor = self.session.open_cursor(uri, None, "debug=(release_evict)") + self.session.begin_transaction('read_timestamp=' + self.timestamp_str(read_ts)) + for k in range(lo, hi): + v = evict_cursor[k] + self.assertEqual(v, value) + self.assertEqual(evict_cursor.reset(), 0) + self.session.rollback_transaction() + + def test_durable_ts04(self): + # Create a table. + uri = 'table:test_durable_ts04' + nrows = 10 + self.session.create( + uri, 'key_format={},value_format={}'.format(self.key_format, self.value_format)) + if self.value_format == '8t': + value_a = 97 + value_b = 0 if self.do_remove else 98 + else: + value_a = "aaaaa" * 100 + value_b = self.Deleted if self.do_remove else "bbbbb" * 100 + + cursor = self.session.open_cursor(uri) + + # Write some initial baseline data at time 5; make it stable and checkpoint it. + self.session.begin_transaction() + for k in range(1, nrows + 1): + cursor[k] = value_a + self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(5)) + self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(5) + \ + ',oldest_timestamp=' + self.timestamp_str(5)) + self.session.checkpoint() + + # Prepare a transaction and commit it with a later durable time. + # Prepare at 10, commit at 20, durable will be 50. Move stable to 30, either + # before or after the commit. + self.session.begin_transaction() + for k in range(1, nrows // 2): + cursor.set_key(k) + if self.do_remove: + self.assertEqual(cursor.remove(), 0) + else: + cursor.set_value(value_b) + self.assertEqual(cursor.update(), 0) + self.session.prepare_transaction('prepare_timestamp=' + self.timestamp_str(10)) + if self.commit_before_stable: + self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(30)) + self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(20) + + ',durable_timestamp=' + self.timestamp_str(50)) + if not self.commit_before_stable: + self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(30)) + + cursor.close() + + # Optionally evict. Have the eviction cursor read at 10 to avoid touching the + # nondurable data. + if self.do_evict: + self.evict(uri, 1, nrows, value_a, 10) + + # Now try reading at 25 and 40. This should fail in all variants. + # (25 is after commit, before stable, and before durable; 40 is after commit + # and stable, and before durable.) + self.check(uri, nrows, None, value_a, 25) + self.check(uri, nrows, None, value_a, 40) + + # We should be able to read at 50. + self.check(uri, nrows, value_b, value_a, 50) + + # Optionally checkpoint. + if self.do_checkpoint: + self.session.checkpoint() + + # Now either crash, roll back explicitly, or move stable forward. + if self.op == 'crash' or self.op == 'rollback': + if self.op == 'crash': + simulate_crash_restart(self, ".", "RESTART") + else: + self.conn.rollback_to_stable() + + # The transaction should disappear, because it isn't stable yet, and it + # should do so without any noise or failures caused by the nondurable checks. + self.check(uri, nrows, value_a, value_a, 15) + self.check(uri, nrows, value_a, value_a, 25) + self.check(uri, nrows, value_a, value_a, 40) + self.check(uri, nrows, value_a, value_a, 50) + else: + # First, move stable to 49 and check we still can't read the nondurable values. + self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(49)) + self.check(uri, nrows, None, value_a, 25) + self.check(uri, nrows, None, value_a, 40) + self.check(uri, nrows, value_b, value_a, 50) + + # Now, set it to 50 and we should be able to read the previously nondurable values. + self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(50)) + self.check(uri, nrows, value_a, value_a, 15) + self.check(uri, nrows, value_b, value_a, 25) + self.check(uri, nrows, value_b, value_a, 30) + self.check(uri, nrows, value_b, value_a, 40) + self.check(uri, nrows, value_b, value_a, 50) + +if __name__ == '__main__': + wttest.run() From 7854c8f9c3fd2adc157c0f4d3fd9a8640dfc99e7 Mon Sep 17 00:00:00 2001 From: "David A. Holland" Date: Thu, 3 Feb 2022 18:28:48 -0500 Subject: [PATCH 06/20] Resort enum and fix incomprehensible comment. --- src/include/txn.h | 4 ++-- src/include/txn_inline.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/include/txn.h b/src/include/txn.h index 56e43260607..864117c7042 100644 --- a/src/include/txn.h +++ b/src/include/txn.h @@ -44,8 +44,8 @@ typedef enum { WT_VISIBLE_FALSE = 0, /* Not a visible update */ - WT_VISIBLE_PREPARE = 1, /* Prepared update */ - WT_VISIBLE_NONDURABLE = 2, /* Committed but not durable update */ + WT_VISIBLE_NONDURABLE = 1, /* Committed but not durable update */ + WT_VISIBLE_PREPARE = 2, /* Prepared update */ WT_VISIBLE_TRUE = 3 /* A visible update */ } WT_VISIBLE_TYPE; diff --git a/src/include/txn_inline.h b/src/include/txn_inline.h index c223fac205a..1106275cd07 100644 --- a/src/include/txn_inline.h +++ b/src/include/txn_inline.h @@ -613,7 +613,7 @@ __wt_txn_upd_value_visible_all(WT_SESSION_IMPL *session, WT_UPDATE_VALUE *upd_va * durable time. (Note that it is not sufficient to just check that the global stable timestamp * has advanced past the commit time; prepared transactions are allowed to commit earlier than * stable if they were prepared after stable and stable has since moved up.) Assumes caller has - * already checked that the commit time is visible and skips checks that makes redundant. + * already checked that the commit time is visible, so does not repeat checks in that code. */ static inline bool __wt_txn_ts_nondurable( From 7f6502282dba9f2a1508857c890ab5dd4423f142 Mon Sep 17 00:00:00 2001 From: "David A. Holland" Date: Thu, 3 Feb 2022 19:27:31 -0500 Subject: [PATCH 07/20] Update the docs. Add a warning about the cases we still can't detect. --- src/docs/timestamp-prepare.dox | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/docs/timestamp-prepare.dox b/src/docs/timestamp-prepare.dox index b31a3efc3de..9f188278ea0 100644 --- a/src/docs/timestamp-prepare.dox +++ b/src/docs/timestamp-prepare.dox @@ -49,6 +49,24 @@ transactions are higher-level MongoDB operations, requiring cluster-level consensus on durability. Applications without similar requirements for prepared transactions should set the durable and commit timestamps to the same time. +When a transaction has a durable timestamp later than its commit timestamp, +reading its writes in a second transaction and then committing such that the +second transaction becomes durable before the first can produce data +inconsistency. To help prevent this, reads between the commit and durable +timestamp will fail with ::WT_ROLLBACK if the global stable timestamp has not +yet advanced to the durable timestamp. Retrying such transactions will +eventually succeed as long as either they are retried at a later timestamp or +the stable timestamp continues to move forward. + +\warning It is also possible to create data inconsistency by reading such a +transaction's writes after its durable timestamp and committing without a +timestamp, if the global stable timestamp has not yet reached the durable +timestamp. (This can involve either reading with a read timestamp after the +durable timestamp, or reading with no read timestamp, as the latter reads the +latest committed values.) Applications using the delayed durable timestamp +feature should be cautious about committing with no write timestamp to ensure +that conflicts of this type cannot arise. + Prepared transactions have their own configuration keyword for rounding timestamps; see @ref timestamp_roundup for more information. From c18971da18dd3440f22f962b82c3d4f23d11927c Mon Sep 17 00:00:00 2001 From: "David A. Holland" Date: Tue, 8 Feb 2022 17:56:10 -0500 Subject: [PATCH 08/20] Switch to returning WT_PREPARE_CONFLICT for this instead of WT_ROLLBACK. --- dist/api_err.py | 3 ++- src/docs/arch-timestamp.dox | 3 +++ src/docs/error-handling.dox | 3 ++- src/docs/timestamp-prepare.dox | 2 +- src/include/txn.h | 1 - src/include/txn_inline.h | 9 ++++++--- src/include/wiredtiger.in | 3 ++- test/suite/test_durable_ts03.py | 4 ++-- test/suite/test_durable_ts04.py | 6 +++--- test/suite/test_flcs03.py | 5 +++-- test/suite/test_hs06.py | 4 ++-- 11 files changed, 26 insertions(+), 17 deletions(-) diff --git a/dist/api_err.py b/dist/api_err.py index dea96d25dc2..9b2a6c6d026 100644 --- a/dist/api_err.py +++ b/dist/api_err.py @@ -63,7 +63,8 @@ def __init__(self, name, value, desc, long_desc=None, **flags): 'conflict with a prepared update', ''' This error is generated when the application attempts to read an updated record which is part of a transaction that has been prepared - but not yet resolved.'''), + but not yet resolved, or part of a transaction that has been prepared + and committed but is not yet durable.'''), Error('WT_TRY_SALVAGE', -31809, 'database corruption detected', ''' This error is generated when corruption is detected in an on-disk file. diff --git a/src/docs/arch-timestamp.dox b/src/docs/arch-timestamp.dox index f213633d422..a2120fbd09e 100644 --- a/src/docs/arch-timestamp.dox +++ b/src/docs/arch-timestamp.dox @@ -273,6 +273,9 @@ commits. Reads between the prepare timestamp and commit timestamp of a transaction that has been prepared but not committed fail with ::WT_PREPARE_CONFLICT. +So do reads between the commit timestamp and durable timestamp of a +transaction that has been committed when the stable timestamp has not +yet reached the durable timestamp. See @ref arch-transaction-prepare for further discussion of prepared transactions. diff --git a/src/docs/error-handling.dox b/src/docs/error-handling.dox index 1c8578d2814..3e552181430 100644 --- a/src/docs/error-handling.dox +++ b/src/docs/error-handling.dox @@ -57,7 +57,8 @@ is in progress, it should be rolled back and the operation retried in a new tran @par WT_PREPARE_CONFLICT This error is generated when the application attempts to read an updated record which is part of a -transaction that has been prepared but not yet resolved. +transaction that has been prepared but not yet resolved, or part of a transaction that has been +prepared and committed but is not yet durable. @par WT_TRY_SALVAGE This error is generated when corruption is detected in an on-disk file. During normal operations, diff --git a/src/docs/timestamp-prepare.dox b/src/docs/timestamp-prepare.dox index 9f188278ea0..e1ce819542f 100644 --- a/src/docs/timestamp-prepare.dox +++ b/src/docs/timestamp-prepare.dox @@ -53,7 +53,7 @@ When a transaction has a durable timestamp later than its commit timestamp, reading its writes in a second transaction and then committing such that the second transaction becomes durable before the first can produce data inconsistency. To help prevent this, reads between the commit and durable -timestamp will fail with ::WT_ROLLBACK if the global stable timestamp has not +timestamp will fail with ::WT_PREPARE_CONFLICT if the global stable timestamp has not yet advanced to the durable timestamp. Retrying such transactions will eventually succeed as long as either they are retried at a later timestamp or the stable timestamp continues to move forward. diff --git a/src/include/txn.h b/src/include/txn.h index 225712edbbc..452b061f736 100644 --- a/src/include/txn.h +++ b/src/include/txn.h @@ -21,7 +21,6 @@ */ #define WT_TXN_ROLLBACK_REASON_CACHE "oldest pinned transaction ID rolled back for eviction" #define WT_TXN_ROLLBACK_REASON_CONFLICT "conflict between concurrent operations" -#define WT_TXN_ROLLBACK_REASON_NONDURABLE "read conflict with committed but non-durable value" /* AUTOMATIC FLAG VALUE GENERATION START 0 */ #define WT_TXN_LOG_CKPT_CLEANUP 0x01u diff --git a/src/include/txn_inline.h b/src/include/txn_inline.h index 2a2b3cf9e9b..817dcf9ff7a 100644 --- a/src/include/txn_inline.h +++ b/src/include/txn_inline.h @@ -988,7 +988,8 @@ __wt_txn_read_upd_list_internal(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, break; if (upd_visible == WT_VISIBLE_NONDURABLE) - return (__wt_txn_rollback_required(session, WT_TXN_ROLLBACK_REASON_NONDURABLE)); + WT_RET_MSG( + session, WT_PREPARE_CONFLICT, "read conflict with committed but non-durable value"); /* * Save the prepared update to help us detect if we race with prepared commit or rollback @@ -1107,7 +1108,8 @@ __wt_txn_read( !F_ISSET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE)) { if (__wt_txn_tw_stop_nondurable(session, &tw)) /* Disallow reading if we can see it but it isn't durable yet. */ - return (__wt_txn_rollback_required(session, WT_TXN_ROLLBACK_REASON_NONDURABLE)); + WT_RET_MSG(session, WT_PREPARE_CONFLICT, + "read conflict with committed but non-durable value"); cbt->upd_value->buf.data = NULL; cbt->upd_value->buf.size = 0; cbt->upd_value->tw.durable_stop_ts = tw.durable_stop_ts; @@ -1136,7 +1138,8 @@ __wt_txn_read( else if (__wt_txn_tw_start_visible(session, &tw)) { if (__wt_txn_tw_start_nondurable(session, &tw)) /* Disallow reading if we can see it but it isn't durable yet. */ - return (__wt_txn_rollback_required(session, WT_TXN_ROLLBACK_REASON_NONDURABLE)); + WT_RET_MSG(session, WT_PREPARE_CONFLICT, + "read conflict with committed but non-durable value"); return_onpage_value = true; } diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 3d0fbcd16c1..a455b3a5d88 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -3831,7 +3831,8 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp) * Conflict with a prepared update. * This error is generated when the application attempts to read an updated * record which is part of a transaction that has been prepared but not yet - * resolved. + * resolved, or part of a transaction that has been prepared and committed but + * is not yet durable. */ #define WT_PREPARE_CONFLICT (-31808) /*! diff --git a/test/suite/test_durable_ts03.py b/test/suite/test_durable_ts03.py index 7aedb40f8c8..243664b0295 100755 --- a/test/suite/test_durable_ts03.py +++ b/test/suite/test_durable_ts03.py @@ -101,8 +101,8 @@ def test_durable_ts03(self): self.assertEquals(cursor.reset(), 0) session.begin_transaction('read_timestamp=' + self.timestamp_str(210)) cursor.set_key(1) - self.assertRaisesException(wiredtiger.WiredTigerError, - lambda: cursor.search(), '/WT_ROLLBACK/', False) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: cursor.search(), '/committed but non-durable value/') session.rollback_transaction() # Read the updated data to confirm that it is visible. diff --git a/test/suite/test_durable_ts04.py b/test/suite/test_durable_ts04.py index ab28385db6a..1d5791fae13 100644 --- a/test/suite/test_durable_ts04.py +++ b/test/suite/test_durable_ts04.py @@ -43,7 +43,7 @@ # time, until the global timestamp reaches the durable time. # # This test makes sure that mechanism works as intended. -# 1. Reading nondurable data produces WT_ROLLBACK. +# 1. Reading nondurable data produces WT_PREPARE_CONFLICT. # 2. Advancing stable to the durable timestamp eliminates this behavior. # 3. The behavior applies to both updates and removes. # 4. The behavior applies to pages that have been evicted as well as to in-memory updates. @@ -92,8 +92,8 @@ def check_range(self, uri, lo, hi, value, read_ts): if value is None: for i in range(lo, hi): self.session.begin_transaction('read_timestamp=' + self.timestamp_str(read_ts)) - self.assertRaisesException(wiredtiger.WiredTigerError, - lambda: cursor[i], '/WT_ROLLBACK/', False) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: cursor[i], '/committed but non-durable value/') self.session.rollback_transaction() else: self.session.begin_transaction('read_timestamp=' + self.timestamp_str(read_ts)) diff --git a/test/suite/test_flcs03.py b/test/suite/test_flcs03.py index 3f60a3e6735..3f648530cf3 100644 --- a/test/suite/test_flcs03.py +++ b/test/suite/test_flcs03.py @@ -207,8 +207,9 @@ def test_flcs(self): self.check_end(cursor, None, 19, append_key) if self.prepare and self.op == 'deleted': # Cannot read between commit and durable timestamps. - self.assertRaisesException(wiredtiger.WiredTigerError, - lambda: self.check_end(cursor, None, 20, append_key), '/WT_ROLLBACK/', False) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.check_end(cursor, None, 20, append_key), + '/committed but non-durable value/') self.session.rollback_transaction() else: self.check_end(cursor, None, 20, append_key) diff --git a/test/suite/test_hs06.py b/test/suite/test_hs06.py index 67d891e3ba6..d57a6dc4c5c 100644 --- a/test/suite/test_hs06.py +++ b/test/suite/test_hs06.py @@ -269,8 +269,8 @@ def test_hs_prepare_reads(self): # Cannot read between commit and durable. self.session.begin_transaction('read_timestamp=' + self.timestamp_str(5)) for i in range(1, 11): - self.assertRaisesException(wiredtiger.WiredTigerError, - lambda: cursor[self.create_key(i)], '/WT_ROLLBACK/', False) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: cursor[self.create_key(i)], '/committed but non-durable value/') self.session.rollback_transaction() self.session.begin_transaction('read_timestamp=' + self.timestamp_str(6)) From 4cd48188107b3d81f4b6d5db4beedf5d25fc2ff2 Mon Sep 17 00:00:00 2001 From: "David A. Holland" Date: Wed, 16 Feb 2022 00:11:55 -0500 Subject: [PATCH 09/20] Honor ignore_prepare=true. Test that it works. --- src/include/txn_inline.h | 17 ++++++++++++----- test/suite/test_durable_ts04.py | 20 ++++++++++++++------ 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/include/txn_inline.h b/src/include/txn_inline.h index 817dcf9ff7a..dfe73e99d3e 100644 --- a/src/include/txn_inline.h +++ b/src/include/txn_inline.h @@ -987,9 +987,14 @@ __wt_txn_read_upd_list_internal(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, if (upd_visible == WT_VISIBLE_TRUE) break; - if (upd_visible == WT_VISIBLE_NONDURABLE) - WT_RET_MSG( - session, WT_PREPARE_CONFLICT, "read conflict with committed but non-durable value"); + if (upd_visible == WT_VISIBLE_NONDURABLE) { + if (F_ISSET(session->txn, WT_TXN_IGNORE_PREPARE)) + /* With ignore_prepare, nondurable values are fully visible. */ + break; + else + WT_RET_MSG(session, WT_PREPARE_CONFLICT, + "read conflict with committed but non-durable value"); + } /* * Save the prepared update to help us detect if we race with prepared commit or rollback @@ -1106,7 +1111,8 @@ __wt_txn_read( */ if (!have_stop_tw && __wt_txn_tw_stop_visible(session, &tw) && !F_ISSET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE)) { - if (__wt_txn_tw_stop_nondurable(session, &tw)) + if (__wt_txn_tw_stop_nondurable(session, &tw) && + !F_ISSET(session->txn, WT_TXN_IGNORE_PREPARE)) /* Disallow reading if we can see it but it isn't durable yet. */ WT_RET_MSG(session, WT_PREPARE_CONFLICT, "read conflict with committed but non-durable value"); @@ -1136,7 +1142,8 @@ __wt_txn_read( if (WT_IS_HS(session->dhandle)) return_onpage_value = true; else if (__wt_txn_tw_start_visible(session, &tw)) { - if (__wt_txn_tw_start_nondurable(session, &tw)) + if (__wt_txn_tw_start_nondurable(session, &tw) && + !F_ISSET(session->txn, WT_TXN_IGNORE_PREPARE)) /* Disallow reading if we can see it but it isn't durable yet. */ WT_RET_MSG(session, WT_PREPARE_CONFLICT, "read conflict with committed but non-durable value"); diff --git a/test/suite/test_durable_ts04.py b/test/suite/test_durable_ts04.py index 1d5791fae13..66149b53848 100644 --- a/test/suite/test_durable_ts04.py +++ b/test/suite/test_durable_ts04.py @@ -48,6 +48,7 @@ # 3. The behavior applies to both updates and removes. # 4. The behavior applies to pages that have been evicted as well as to in-memory updates. # 5. Rollback-to-stable can successfully roll back the transaction before it becomes stable. +# 6. ignore_prepare=true disables the check. # # Additionally there are two ways to set up the scenario; one involves committing # before stable. (This is explicitly permitted if stable has moved up since prepare, as @@ -82,8 +83,13 @@ class test_durable_ts04(wttest.WiredTigerTestCase): ('rollback', dict(op='rollback')), ('stabilize', dict(op='stabilize')), ] + ignoreprepare_values = [ + ('no_ignore_prepare', dict(ignore_prepare=False)), + ('ignore_prepare', dict(ignore_prepare=True)), + ] scenarios = make_scenarios(format_values, - commit_values, remove_values, evict_values, checkpoint_values, op_values) + commit_values, remove_values, evict_values, checkpoint_values, op_values, + ignoreprepare_values) Deleted = 1234567 # Choose this to be different from any legal value. @@ -96,7 +102,8 @@ def check_range(self, uri, lo, hi, value, read_ts): lambda: cursor[i], '/committed but non-durable value/') self.session.rollback_transaction() else: - self.session.begin_transaction('read_timestamp=' + self.timestamp_str(read_ts)) + ign = ',ignore_prepare=true' if self.ignore_prepare else '' + self.session.begin_transaction('read_timestamp=' + self.timestamp_str(read_ts) + ign) for i in range(lo, hi): if value == self.Deleted: cursor.set_key(i) @@ -172,8 +179,9 @@ def test_durable_ts04(self): # Now try reading at 25 and 40. This should fail in all variants. # (25 is after commit, before stable, and before durable; 40 is after commit # and stable, and before durable.) - self.check(uri, nrows, None, value_a, 25) - self.check(uri, nrows, None, value_a, 40) + # If ignore_prepare is set, we should see the transaction anyway and get value_b. + self.check(uri, nrows, value_b if self.ignore_prepare else None, value_a, 25) + self.check(uri, nrows, value_b if self.ignore_prepare else None, value_a, 40) # We should be able to read at 50. self.check(uri, nrows, value_b, value_a, 50) @@ -198,8 +206,8 @@ def test_durable_ts04(self): else: # First, move stable to 49 and check we still can't read the nondurable values. self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(49)) - self.check(uri, nrows, None, value_a, 25) - self.check(uri, nrows, None, value_a, 40) + self.check(uri, nrows, value_b if self.ignore_prepare else None, value_a, 25) + self.check(uri, nrows, value_b if self.ignore_prepare else None, value_a, 40) self.check(uri, nrows, value_b, value_a, 50) # Now, set it to 50 and we should be able to read the previously nondurable values. From 9af2e1f7b841589cbf1f7acb0384621c3eaa1066 Mon Sep 17 00:00:00 2001 From: "David A. Holland" Date: Tue, 22 Feb 2022 21:35:44 -0500 Subject: [PATCH 10/20] Fix tests that failed in the last test run, for posterity. --- test/suite/test_durable_ts03.py | 1 + test/suite/test_flcs03.py | 9 +-------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/test/suite/test_durable_ts03.py b/test/suite/test_durable_ts03.py index 2e9c09f7bae..cf9abd5bc49 100755 --- a/test/suite/test_durable_ts03.py +++ b/test/suite/test_durable_ts03.py @@ -27,6 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wttest +import wiredtiger from wtscenario import make_scenarios # test_durable_ts03.py diff --git a/test/suite/test_flcs03.py b/test/suite/test_flcs03.py index 3f648530cf3..30932a9e1f0 100644 --- a/test/suite/test_flcs03.py +++ b/test/suite/test_flcs03.py @@ -205,13 +205,6 @@ def test_flcs(self): self.check_end(cursor, None, 5, append_key) self.check_end(cursor, None, 10, append_key) self.check_end(cursor, None, 19, append_key) - if self.prepare and self.op == 'deleted': - # Cannot read between commit and durable timestamps. - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.check_end(cursor, None, 20, append_key), - '/committed but non-durable value/') - self.session.rollback_transaction() - else: - self.check_end(cursor, None, 20, append_key) + self.check_end(cursor, None, 20, append_key) self.check_end(cursor, None, 21, append_key) self.check_end(cursor, None, 30, append_key) From b5a8b4476f1fca0b386bffec87df14e63cd70f89 Mon Sep 17 00:00:00 2001 From: "David A. Holland" Date: Wed, 23 Feb 2022 22:24:02 -0500 Subject: [PATCH 11/20] Take a different approach for WT-8747. Instead of prohibiting reads of nondurable data, track the durable timestamps of data read by a transaction and prohibit committing with a durable timestamp (or commit timestamp for non-prepared transactions) earlier than that. Keep the test changes; most of them are about only reading nondurable data when we explicitly mean to. It seems best to avoid doing that otherwise. --- dist/api_err.py | 3 +- src/docs/arch-timestamp.dox | 15 ++++- src/docs/error-handling.dox | 3 +- src/docs/timestamp-prepare.dox | 13 ++-- src/include/extern.h | 6 -- src/include/txn.h | 12 ++-- src/include/txn_inline.h | 113 +++++--------------------------- src/include/wiredtiger.in | 3 +- src/txn/txn.c | 1 + src/txn/txn_ckpt.c | 5 ++ src/txn/txn_timestamp.c | 8 +++ test/suite/test_durable_ts03.py | 7 +- test/suite/test_hs06.py | 5 +- 13 files changed, 67 insertions(+), 127 deletions(-) diff --git a/dist/api_err.py b/dist/api_err.py index 9b2a6c6d026..dea96d25dc2 100644 --- a/dist/api_err.py +++ b/dist/api_err.py @@ -63,8 +63,7 @@ def __init__(self, name, value, desc, long_desc=None, **flags): 'conflict with a prepared update', ''' This error is generated when the application attempts to read an updated record which is part of a transaction that has been prepared - but not yet resolved, or part of a transaction that has been prepared - and committed but is not yet durable.'''), + but not yet resolved.'''), Error('WT_TRY_SALVAGE', -31809, 'database corruption detected', ''' This error is generated when corruption is detected in an on-disk file. diff --git a/src/docs/arch-timestamp.dox b/src/docs/arch-timestamp.dox index f1f668f5649..2ea6fb00bfa 100644 --- a/src/docs/arch-timestamp.dox +++ b/src/docs/arch-timestamp.dox @@ -288,9 +288,18 @@ commits. Reads between the prepare timestamp and commit timestamp of a transaction that has been prepared but not committed fail with ::WT_PREPARE_CONFLICT. -So do reads between the commit timestamp and durable timestamp of a -transaction that has been committed when the stable timestamp has not -yet reached the durable timestamp. + +Reads between the commit timestamp and durable timestamp of a +transaction that has been committed but is not yet stable (that is, +the stable timestamp is at or past the commit timestamp but has not +yet advanced to the durable timestamp) are potentially unsafe. +If a second transaction performs such a read and then commits with an +earlier durable timestamp, and a checkpoint includes the second +transaction's stable timestamp but not the first, that checkpoint then +contains inconsistent data. +To avoid this inconsistency, transactions that perform unsafe reads of +this type are restricted at commit time to durable timestamps that are +no less than the durable timestamps of the data they read. See @ref arch-transaction-prepare for further discussion of prepared transactions. diff --git a/src/docs/error-handling.dox b/src/docs/error-handling.dox index 3e552181430..1c8578d2814 100644 --- a/src/docs/error-handling.dox +++ b/src/docs/error-handling.dox @@ -57,8 +57,7 @@ is in progress, it should be rolled back and the operation retried in a new tran @par WT_PREPARE_CONFLICT This error is generated when the application attempts to read an updated record which is part of a -transaction that has been prepared but not yet resolved, or part of a transaction that has been -prepared and committed but is not yet durable. +transaction that has been prepared but not yet resolved. @par WT_TRY_SALVAGE This error is generated when corruption is detected in an on-disk file. During normal operations, diff --git a/src/docs/timestamp-prepare.dox b/src/docs/timestamp-prepare.dox index 3dbf9e12885..fa57ae0c600 100644 --- a/src/docs/timestamp-prepare.dox +++ b/src/docs/timestamp-prepare.dox @@ -50,14 +50,15 @@ transactions should set the durable and commit timestamps to the same time. When a transaction has a durable timestamp later than its commit timestamp, reading its writes in a second transaction and then committing such that the second transaction becomes durable before the first can produce data -inconsistency. To help prevent this, reads between the commit and durable -timestamp will fail with ::WT_PREPARE_CONFLICT if the global stable timestamp has not -yet advanced to the durable timestamp. Retrying such transactions will -eventually succeed as long as either they are retried at a later timestamp or -the stable timestamp continues to move forward. +inconsistency. Such commits are prohibited. +Applications that create gaps between their commit timestamps and +durable timestamps are responsible for either not reading in those +gaps, or coordinating the durable timestamps of their commits to +prevent this scenario. \warning It is also possible to create data inconsistency by reading such a -transaction's writes after its durable timestamp and committing without a +transaction's writes after its durable timestamp (including by +reading with no read timestamp) and committing without a timestamp, if the global stable timestamp has not yet reached the durable timestamp. (This can involve either reading with a read timestamp after the durable timestamp, or reading with no read timestamp, as the latter reads the diff --git a/src/include/extern.h b/src/include/extern.h index b9127d4f508..3cf657c87f9 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1957,16 +1957,10 @@ static inline bool __wt_session_can_wait(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_split_descent_race(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX *saved_pindex) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_txn_ts_nondurable(WT_SESSION_IMPL *session, uint64_t txnid, - uint64_t commit_ts, uint64_t durable_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_txn_tw_start_nondurable(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_txn_tw_start_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_txn_tw_start_visible_all(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_txn_tw_stop_nondurable(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_txn_tw_stop_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_txn_tw_stop_visible_all(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) diff --git a/src/include/txn.h b/src/include/txn.h index 1872d60e6ca..06fea607090 100644 --- a/src/include/txn.h +++ b/src/include/txn.h @@ -42,10 +42,9 @@ /* AUTOMATIC FLAG VALUE GENERATION STOP 32 */ typedef enum { - WT_VISIBLE_FALSE = 0, /* Not a visible update */ - WT_VISIBLE_NONDURABLE = 1, /* Committed but not durable update */ - WT_VISIBLE_PREPARE = 2, /* Prepared update */ - WT_VISIBLE_TRUE = 3 /* A visible update */ + WT_VISIBLE_FALSE = 0, /* Not a visible update */ + WT_VISIBLE_PREPARE = 1, /* Prepared update */ + WT_VISIBLE_TRUE = 2 /* A visible update */ } WT_VISIBLE_TYPE; /* @@ -291,6 +290,11 @@ struct __wt_txn { */ wt_timestamp_t prepare_timestamp; + /* + * Maximum durable timestamp of any data read by this transaction. + */ + wt_timestamp_t max_durable_timestamp_read; + /* Array of modifications by this transaction. */ WT_TXN_OP *mod; size_t mod_alloc; diff --git a/src/include/txn_inline.h b/src/include/txn_inline.h index 39013698dfa..580a1112eeb 100644 --- a/src/include/txn_inline.h +++ b/src/include/txn_inline.h @@ -581,41 +581,6 @@ __wt_txn_upd_value_visible_all(WT_SESSION_IMPL *session, WT_UPDATE_VALUE *upd_va __wt_txn_visible_all(session, upd_value->tw.start_txn, upd_value->tw.durable_start_ts)); } -/* - * __wt_txn_ts_nondurable -- - * Is the given timestamp not yet durable? It is durable unless we are reading between the - * commit time the durable time *and* the global stable timestamp has not yet advanced past the - * durable time. (Note that it is not sufficient to just check that the global stable timestamp - * has advanced past the commit time; prepared transactions are allowed to commit earlier than - * stable if they were prepared after stable and stable has since moved up.) Assumes caller has - * already checked that the commit time is visible, so does not repeat checks in that code. - */ -static inline bool -__wt_txn_ts_nondurable( - WT_SESSION_IMPL *session, uint64_t txnid, uint64_t commit_ts, uint64_t durable_ts) -{ - WT_TXN_GLOBAL *txn_global; - - txn_global = &S2C(session)->txn_global; - - /* If there is no gap between commit time and durable time, we can't read between them. */ - if (durable_ts == commit_ts) - return (false); - - /* - * If the durable time is stable, the value might or might not actually be durable (depending on - * whether a checkpoint has been completed) but we (the caller) cannot become durable before it, - * so for us it counts as durable. The purpose of this check is to make sure we can't read a - * nondurable value, then commit and become durable before it, because that violates consistency - * expectations. - */ - if (durable_ts <= txn_global->stable_timestamp) - return (false); - - /* Otherwise, if the durable time is not visible, we are reading before it. */ - return (!__wt_txn_visible(session, txnid, durable_ts)); -} - /* * __wt_txn_tw_stop_visible -- * Is the given stop time window visible? @@ -627,18 +592,6 @@ __wt_txn_tw_stop_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) __wt_txn_visible(session, tw->stop_txn, tw->stop_ts)); } -/* - * __wt_txn_tw_stop_nondurable -- - * Is the given stop time window not yet durable? See notes about the start time below. Assumes - * the caller has already checked that the stop time is visible, so among other things we know - * that the time window *does* have a stop time. - */ -static inline bool -__wt_txn_tw_stop_nondurable(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) -{ - return (__wt_txn_ts_nondurable(session, tw->stop_txn, tw->stop_ts, tw->durable_stop_ts)); -} - /* * __wt_txn_tw_start_visible -- * Is the given start time window visible? @@ -657,17 +610,6 @@ __wt_txn_tw_start_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) __wt_txn_visible(session, tw->start_txn, tw->start_ts)); } -/* - * __wt_txn_tw_start_nondurable -- - * Is the given start time window not yet durable? Assumes caller has already checked that the - * start time is visible. - */ -static inline bool -__wt_txn_tw_start_nondurable(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) -{ - return (__wt_txn_ts_nondurable(session, tw->start_txn, tw->start_ts, tw->durable_start_ts)); -} - /* * __wt_txn_tw_start_visible_all -- * Is the given start time window visible to all (possible) readers? @@ -796,12 +738,9 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t timestamp static inline WT_VISIBLE_TYPE __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd) { - WT_TXN_GLOBAL *txn_global; uint8_t prepare_state, previous_state; bool upd_visible; - txn_global = &S2C(session)->txn_global; - for (;; __wt_yield()) { /* Prepare state change is in progress, yield and try again. */ WT_ORDERED_READ(prepare_state, upd->prepare_state); @@ -814,12 +753,6 @@ __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd) return (WT_VISIBLE_TRUE); upd_visible = __wt_txn_visible(session, upd->txnid, upd->start_ts); - if (upd_visible && upd->durable_ts > upd->start_ts && - upd->durable_ts > txn_global->stable_timestamp) { - upd_visible = __wt_txn_visible(session, upd->txnid, upd->durable_ts); - if (!upd_visible) - return (WT_VISIBLE_NONDURABLE); - } /* * The visibility check is only valid if the update does not change state. If the state does @@ -954,15 +887,6 @@ __wt_txn_read_upd_list_internal(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, if (upd_visible == WT_VISIBLE_TRUE) break; - if (upd_visible == WT_VISIBLE_NONDURABLE) { - if (F_ISSET(session->txn, WT_TXN_IGNORE_PREPARE)) - /* With ignore_prepare, nondurable values are fully visible. */ - break; - else - WT_RET_MSG(session, WT_PREPARE_CONFLICT, - "read conflict with committed but non-durable value"); - } - /* * Save the prepared update to help us detect if we race with prepared commit or rollback * irrespective of update visibility. @@ -994,6 +918,10 @@ __wt_txn_read_upd_list_internal(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, if (upd == NULL) return (0); + if (session->isolation != WT_ISO_READ_UNCOMMITTED && cbt->upd_value->skip_buf == false) + session->txn->max_durable_timestamp_read = + WT_MAX(session->txn->max_durable_timestamp_read, upd->durable_ts); + /* * Now assign to the update value. If it's not a modify, we're free to simply point the value at * the update's memory without owning it. If it is a modify, we need to reconstruct the full @@ -1032,11 +960,10 @@ __wt_txn_read( { WT_TIME_WINDOW tw; WT_UPDATE *prepare_upd, *restored_upd; - bool have_stop_tw, retry, return_onpage_value; + bool have_stop_tw, retry; prepare_upd = restored_upd = NULL; retry = true; - return_onpage_value = false; retry: WT_RET(__wt_txn_read_upd_list_internal(session, cbt, upd, &prepare_upd, &restored_upd)); @@ -1078,11 +1005,9 @@ __wt_txn_read( */ if (!have_stop_tw && __wt_txn_tw_stop_visible(session, &tw) && !F_ISSET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE)) { - if (__wt_txn_tw_stop_nondurable(session, &tw) && - !F_ISSET(session->txn, WT_TXN_IGNORE_PREPARE)) - /* Disallow reading if we can see it but it isn't durable yet. */ - WT_RET_MSG(session, WT_PREPARE_CONFLICT, - "read conflict with committed but non-durable value"); + if (session->isolation != WT_ISO_READ_UNCOMMITTED && cbt->upd_value->skip_buf == false) + session->txn->max_durable_timestamp_read = + WT_MAX(session->txn->max_durable_timestamp_read, tw.durable_stop_ts); cbt->upd_value->buf.data = NULL; cbt->upd_value->buf.size = 0; cbt->upd_value->tw.durable_stop_ts = tw.durable_stop_ts; @@ -1106,18 +1031,10 @@ __wt_txn_read( * 1. The record is from the history store. * 2. It is visible to the reader. */ - if (WT_IS_HS(session->dhandle)) - return_onpage_value = true; - else if (__wt_txn_tw_start_visible(session, &tw)) { - if (__wt_txn_tw_start_nondurable(session, &tw) && - !F_ISSET(session->txn, WT_TXN_IGNORE_PREPARE)) - /* Disallow reading if we can see it but it isn't durable yet. */ - WT_RET_MSG(session, WT_PREPARE_CONFLICT, - "read conflict with committed but non-durable value"); - return_onpage_value = true; - } - - if (return_onpage_value) { + if (WT_IS_HS(session->dhandle) || __wt_txn_tw_start_visible(session, &tw)) { + if (session->isolation != WT_ISO_READ_UNCOMMITTED && cbt->upd_value->skip_buf == false) + session->txn->max_durable_timestamp_read = + WT_MAX(session->txn->max_durable_timestamp_read, tw.durable_start_ts); if (cbt->upd_value->skip_buf) { cbt->upd_value->buf.data = NULL; cbt->upd_value->buf.size = 0; @@ -1187,6 +1104,12 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]) WT_ASSERT(session, !F_ISSET(txn, WT_TXN_RUNNING)); + /* + * Clear the maximum durable timestamp that we've seen. It might not be empty if there have been + * preceding reads without an explicit transaction. + */ + txn->max_durable_timestamp_read = WT_TS_NONE; + WT_RET(__wt_txn_config(session, cfg)); /* diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index eab58ab9299..d736f8f60dd 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -3834,8 +3834,7 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp) * Conflict with a prepared update. * This error is generated when the application attempts to read an updated * record which is part of a transaction that has been prepared but not yet - * resolved, or part of a transaction that has been prepared and committed but - * is not yet durable. + * resolved. */ #define WT_PREPARE_CONFLICT (-31808) /*! diff --git a/src/txn/txn.c b/src/txn/txn.c index 430e7417410..c98ff57f8b5 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -706,6 +706,7 @@ __wt_txn_release(WT_SESSION_IMPL *session) */ txn->flags = 0; txn->prepare_timestamp = WT_TS_NONE; + txn->max_durable_timestamp_read = WT_TS_NONE; /* Clear operation timer. */ txn->operation_timeout_us = 0; diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 9eb71ab739f..60ac959e901 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -1015,7 +1015,12 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) * Commit the transaction now that we are sure that all files in the checkpoint have been * flushed to disk. It's OK to commit before checkpointing the metadata since we know that all * files in the checkpoint are now in a consistent state. + * + * Clear the maximum durable timestamp read before committing, since we explicitly allow + * nondurable values to be checkpointed. (They are cleaned out when needed by rollback to + * stable.) */ + session->txn->max_durable_timestamp_read = WT_TS_NONE; WT_ERR(__wt_txn_commit(session, NULL)); /* diff --git a/src/txn/txn_timestamp.c b/src/txn/txn_timestamp.c index 99bbc505c7a..9ea4f24fdb8 100644 --- a/src/txn/txn_timestamp.c +++ b/src/txn/txn_timestamp.c @@ -680,6 +680,14 @@ __wt_txn_set_durable_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t durable_ __wt_timestamp_to_string(durable_ts, ts_string[0]), __wt_timestamp_to_string(txn->commit_timestamp, ts_string[1])); + /* Transactions that have made writes must not commit into the past of things they read. */ + if (txn->mod_count > 0 && durable_ts != WT_TS_NONE && + durable_ts < txn->max_durable_timestamp_read) + WT_RET_MSG(session, EINVAL, + "durable timestamp %s is before the durable timestamp %s of a value it read", + __wt_timestamp_to_string(durable_ts, ts_string[0]), + __wt_timestamp_to_string(txn->max_durable_timestamp_read, ts_string[1])); + txn->durable_timestamp = durable_ts; F_SET(txn, WT_TXN_HAS_TS_DURABLE); diff --git a/test/suite/test_durable_ts03.py b/test/suite/test_durable_ts03.py index cf9abd5bc49..6fea0ac2e73 100755 --- a/test/suite/test_durable_ts03.py +++ b/test/suite/test_durable_ts03.py @@ -97,12 +97,11 @@ def test_durable_ts03(self): self.assertEqual(value, valueA) session.commit_transaction() - # Check that the updated data cannot be read while it is not yet durable. + # Check that the updated data can still be read even while it is not yet durable. self.assertEquals(cursor.reset(), 0) session.begin_transaction('read_timestamp=' + self.timestamp_str(210)) - cursor.set_key(1) - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: cursor.search(), '/committed but non-durable value/') + for key, value in cursor: + self.assertEqual(value, valueB) session.rollback_transaction() # Read the updated data to confirm that it is visible. diff --git a/test/suite/test_hs06.py b/test/suite/test_hs06.py index d57a6dc4c5c..38e7756c405 100644 --- a/test/suite/test_hs06.py +++ b/test/suite/test_hs06.py @@ -266,11 +266,10 @@ def test_hs_prepare_reads(self): prepare_session.commit_transaction( 'commit_timestamp=' + self.timestamp_str(5) + ',durable_timestamp=' + self.timestamp_str(6)) - # Cannot read between commit and durable. + # Specifically check that we can read between commit and durable. self.session.begin_transaction('read_timestamp=' + self.timestamp_str(5)) for i in range(1, 11): - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: cursor[self.create_key(i)], '/committed but non-durable value/') + self.assertEquals(value2, cursor[self.create_key(i)]) self.session.rollback_transaction() self.session.begin_transaction('read_timestamp=' + self.timestamp_str(6)) From 9c2105d598a036694296e634126c433d3e22b226 Mon Sep 17 00:00:00 2001 From: Luke Chen Date: Wed, 23 Feb 2022 14:41:55 +1100 Subject: [PATCH 12/20] WT-8866 Disable LSM performance tests in Evergreen (#7576) --- test/evergreen.yml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/test/evergreen.yml b/test/evergreen.yml index a1f7098fe2d..22f90dacd05 100755 --- a/test/evergreen.yml +++ b/test/evergreen.yml @@ -2938,7 +2938,8 @@ tasks: perf-test-name: update-checkpoint-btree.wtperf - name: perf-test-update-checkpoint-lsm - tags: ["checkpoint-perf"] + # Disable/Un-tag the LSM perf test till the support for LSM is restored. + # tags: ["checkpoint-perf"] depends_on: - name: compile commands: @@ -3185,7 +3186,8 @@ tasks: perf-test-name: evict-btree-1.wtperf - name: perf-test-evict-lsm - tags: ["evict-perf"] + # Disable/Un-tag the LSM perf test till the support for LSM is restored. + # tags: ["evict-perf"] depends_on: - name: compile commands: @@ -3200,7 +3202,8 @@ tasks: perf-test-name: evict-lsm.wtperf - name: perf-test-evict-lsm-1 - tags: ["evict-perf"] + # Disable/Un-tag the LSM perf test till the support for LSM is restored. + # tags: ["evict-perf"] depends_on: - name: compile commands: @@ -3735,7 +3738,8 @@ buildvariants: tasks: - name: compile - name: ".btree-perf" - - name: ".lsm-perf" + # Disable LSM perf tests till the support for LSM is restored. + # - name: ".lsm-perf" - name: ".stress-perf" - name: ".checkpoint-perf" - name: ".evict-perf" @@ -3746,9 +3750,10 @@ buildvariants: - name: Wiredtiger-perf-btree-jobs execution_tasks: - ".btree-perf" - - name: Wiredtiger-perf-lsm-jobs - execution_tasks: - - ".lsm-perf" + # Disable LSM perf tests till the support for LSM is restored. + # - name: Wiredtiger-perf-lsm-jobs + # execution_tasks: + # - ".lsm-perf" - name: Wiredtiger-perf-stress-jobs execution_tasks: - ".stress-perf" From 0fb96bc8d77cd59180199f755436b39ea999500a Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 23 Feb 2022 13:24:14 -0800 Subject: [PATCH 13/20] WT-8708 Fix timestamp usage error in test/checkpoint (#7582) Don't read g.ts_stable until it's actually been set by set_stable(), otherwise we can try and set the oldest timestamp after the stable timestamp. --- test/checkpoint/checkpointer.c | 12 +++++++----- test/checkpoint/test_checkpoint.h | 4 ++-- test/checkpoint/workers.c | 8 ++++---- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/test/checkpoint/checkpointer.c b/test/checkpoint/checkpointer.c index 6d92908124a..3239fb0bfed 100644 --- a/test/checkpoint/checkpointer.c +++ b/test/checkpoint/checkpointer.c @@ -44,10 +44,10 @@ set_stable(void) char buf[128]; if (g.race_timetamps) - testutil_check(__wt_snprintf( - buf, sizeof(buf), "stable_timestamp=%x,oldest_timestamp=%x", g.ts_stable, g.ts_stable)); + testutil_check(__wt_snprintf(buf, sizeof(buf), + "stable_timestamp=%" PRIx64 ",oldest_timestamp=%" PRIx64, g.ts_stable, g.ts_stable)); else - testutil_check(__wt_snprintf(buf, sizeof(buf), "stable_timestamp=%x", g.ts_stable)); + testutil_check(__wt_snprintf(buf, sizeof(buf), "stable_timestamp=%" PRIx64, g.ts_stable)); testutil_check(g.conn->set_timestamp(g.conn, buf)); } @@ -202,7 +202,9 @@ real_checkpointer(void) verify_ts = stable_ts; else verify_ts = __wt_random(&rnd) % (stable_ts - oldest_ts + 1) + oldest_ts; - WT_ORDERED_READ(g.ts_oldest, g.ts_stable); + __wt_writelock((WT_SESSION_IMPL *)session, &g.clock_lock); + g.ts_oldest = g.ts_stable; + __wt_writeunlock((WT_SESSION_IMPL *)session, &g.clock_lock); } /* Execute a checkpoint */ @@ -225,7 +227,7 @@ real_checkpointer(void) /* Advance the oldest timestamp to the most recently set stable timestamp. */ if (g.use_timestamps && g.ts_oldest != 0) { testutil_check(__wt_snprintf( - timestamp_buf, sizeof(timestamp_buf), "oldest_timestamp=%x", g.ts_oldest)); + timestamp_buf, sizeof(timestamp_buf), "oldest_timestamp=%" PRIx64, g.ts_oldest)); testutil_check(g.conn->set_timestamp(g.conn, timestamp_buf)); } /* Random value between 4 and 8 seconds. */ diff --git a/test/checkpoint/test_checkpoint.h b/test/checkpoint/test_checkpoint.h index adcbfdcb706..31689c4c32d 100644 --- a/test/checkpoint/test_checkpoint.h +++ b/test/checkpoint/test_checkpoint.h @@ -74,8 +74,8 @@ typedef struct { bool hs_checkpoint_timing_stress; /* History store checkpoint timing stress */ bool reserved_txnid_timing_stress; /* Reserved transaction id timing stress */ bool checkpoint_slow_timing_stress; /* Checkpoint slow timing stress */ - u_int ts_oldest; /* Current oldest timestamp */ - u_int ts_stable; /* Current stable timestamp */ + uint64_t ts_oldest; /* Current oldest timestamp */ + uint64_t ts_stable; /* Current stable timestamp */ bool mixed_mode_deletes; /* Run with mixed mode deletes */ bool use_timestamps; /* Use txn timestamps */ bool race_timetamps; /* Async update to oldest timestamp */ diff --git a/test/checkpoint/workers.c b/test/checkpoint/workers.c index 6764a68aa78..1c3ac198d88 100644 --- a/test/checkpoint/workers.c +++ b/test/checkpoint/workers.c @@ -424,18 +424,18 @@ real_worker(void) next_rnd = __wt_random(&rnd); if (g.prepare && next_rnd % 2 == 0) { testutil_check(__wt_snprintf( - buf, sizeof(buf), "prepare_timestamp=%x", g.ts_stable + 1)); + buf, sizeof(buf), "prepare_timestamp=%" PRIx64, g.ts_stable + 1)); if ((ret = session->prepare_transaction(session, buf)) != 0) { __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); (void)log_print_err("real_worker:prepare_transaction", ret, 1); goto err; } testutil_check(__wt_snprintf(buf, sizeof(buf), - "durable_timestamp=%x,commit_timestamp=%x", g.ts_stable + 3, - g.ts_stable + 1)); + "durable_timestamp=%" PRIx64 ",commit_timestamp=%" PRIx64, + g.ts_stable + 3, g.ts_stable + 1)); } else testutil_check(__wt_snprintf( - buf, sizeof(buf), "commit_timestamp=%x", g.ts_stable + 1)); + buf, sizeof(buf), "commit_timestamp=%" PRIx64, g.ts_stable + 1)); // Commit majority of times if (next_rnd % 49 != 0) { From 5907987018c156d7b525d2839660faa2803b3db2 Mon Sep 17 00:00:00 2001 From: Etienne Petrel <77254506+etienneptl@users.noreply.github.com> Date: Thu, 24 Feb 2022 08:55:36 +1100 Subject: [PATCH 14/20] WT-8706 Increase the cache size in all test_checkpoint scenarios (#7577) The cache size is now set to 1GB instead of 100MB by default. --- src/btree/bt_walk.c | 3 +-- test/checkpoint/CMakeLists.txt | 12 ++++++------ test/checkpoint/test_checkpoint.c | 11 +++++------ test/checkpoint/workers.c | 8 ++++---- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c index dce4b1843e3..e3ee0c03bf7 100644 --- a/src/btree/bt_walk.c +++ b/src/btree/bt_walk.c @@ -255,8 +255,7 @@ __tree_walk_internal(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp restart_sleep = restart_yield = 0; empty_internal = false; - /* Ensure we have a snapshot to check visibility or we only check global visibility. - */ + /* Ensure we have a snapshot to check visibility or we only check global visibility. */ WT_ASSERT(session, LF_ISSET(WT_READ_VISIBLE_ALL) || F_ISSET(session->txn, WT_TXN_HAS_SNAPSHOT)); /* All current tree walks skip deleted pages. */ diff --git a/test/checkpoint/CMakeLists.txt b/test/checkpoint/CMakeLists.txt index d15dfb51d84..10cba09205c 100644 --- a/test/checkpoint/CMakeLists.txt +++ b/test/checkpoint/CMakeLists.txt @@ -22,24 +22,24 @@ define_test_variants(test_checkpoint "test_checkpoint_6_fixed_named;-t f -T 6 -c TeSt" "test_checkpoint_6_fixed_prepare;-t f -T 6 -p" "test_checkpoint_6_fixed_named_prepare;-t f -T 6 -c TeSt -p" - "test_checkpoint_fixed_stress_sweep_timestamps;-t f -W 3 -r 2 -s 1 -x -n 100000 -k 100000 -C cache_size=100MB -D" - "test_checkpoint_fixed_sweep_timestamps;-t f -W 3 -r 2 -s 1 -x -n 100000 -k 100000 -C cache_size=100MB" + "test_checkpoint_fixed_stress_sweep_timestamps;-t f -W 3 -r 2 -s 1 -x -n 100000 -k 100000 -D" + "test_checkpoint_fixed_sweep_timestamps;-t f -W 3 -r 2 -s 1 -x -n 100000 -k 100000" # 3. VLCS cases. "test_checkpoint_6_column;-t c -T 6" "test_checkpoint_6_column_named;-t c -T 6 -c TeSt" "test_checkpoint_6_column_prepare;-t c -T 6 -p" "test_checkpoint_6_column_named_prepare;-t c -T 6 -c TeSt -p" - "test_checkpoint_column_stress_sweep_timestamps;-t c -W 3 -r 2 -s 1 -x -n 100000 -k 100000 -C cache_size=100MB -D" - "test_checkpoint_column_sweep_timestamps;-t c -W 3 -r 2 -s 1 -x -n 100000 -k 100000 -C cache_size=100MB" + "test_checkpoint_column_stress_sweep_timestamps;-t c -W 3 -r 2 -s 1 -x -n 100000 -k 100000 -D" + "test_checkpoint_column_sweep_timestamps;-t c -W 3 -r 2 -s 1 -x -n 100000 -k 100000" # 4. Row-store cases. "test_checkpoint_6_row;-t r -T 6" "test_checkpoint_6_row_named;-t r -T 6 -c TeSt" "test_checkpoint_6_row_prepare;-t r -T 6 -p" "test_checkpoint_6_row_named_prepare;-t r -T 6 -c TeSt -p" - "test_checkpoint_row_stress_sweep_timestamps;-t r -W 3 -r 2 -s 1 -x -n 100000 -k 100000 -C cache_size=100MB -D" - "test_checkpoint_row_sweep_timestamps;-t r -W 3 -r 2 -s 1 -x -n 100000 -k 100000 -C cache_size=100MB" + "test_checkpoint_row_stress_sweep_timestamps;-t r -W 3 -r 2 -s 1 -x -n 100000 -k 100000 -D" + "test_checkpoint_row_sweep_timestamps;-t r -W 3 -r 2 -s 1 -x -n 100000 -k 100000" # 5. LSM cases. "test_checkpoint_6_lsm;-t l -T 6" diff --git a/test/checkpoint/test_checkpoint.c b/test/checkpoint/test_checkpoint.c index a864b035563..4c3e84bd8ba 100644 --- a/test/checkpoint/test_checkpoint.c +++ b/test/checkpoint/test_checkpoint.c @@ -263,16 +263,15 @@ wt_connect(const char *config_open) }; int ret; char config[512]; - char timing_stress_cofing[512]; + char timing_stress_config[512]; bool timing_stress; timing_stress = false; - if (g.sweep_stress || g.failpoint_hs_delete_key_from_ts || g.failpoint_hs_insert_1 || g.failpoint_hs_insert_2 || g.hs_checkpoint_timing_stress || g.reserved_txnid_timing_stress || g.checkpoint_slow_timing_stress) { timing_stress = true; - testutil_check(__wt_snprintf(timing_stress_cofing, sizeof(timing_stress_cofing), + testutil_check(__wt_snprintf(timing_stress_config, sizeof(timing_stress_config), ",timing_stress_for_test=[%s%s%s%s%s]", g.sweep_stress ? "aggressive_sweep" : "", g.failpoint_hs_delete_key_from_ts ? "failpoint_history_store_delete_key_from_ts" : "", g.hs_checkpoint_timing_stress ? "history_store_checkpoint_delay" : "", @@ -288,14 +287,14 @@ wt_connect(const char *config_open) "create,cache_cursors=false,statistics=(fast),statistics_log=(json,wait=1),error_prefix=" "\"%s\",file_manager=(close_handle_minimum=1,close_idle_time=1,close_scan_interval=1)," "log=(enabled),cache_size=1GB%s%s%s%s", - progname, timing_stress_cofing, g.debug_mode ? DEBUG_MODE_CFG : "", + progname, timing_stress_config, g.debug_mode ? DEBUG_MODE_CFG : "", config_open == NULL ? "" : ",", config_open == NULL ? "" : config_open)); else { testutil_check(__wt_snprintf(config, sizeof(config), "create,cache_cursors=false,statistics=(fast),statistics_log=(json,wait=1),log=(enabled)," - "error_prefix=\"%s\"%s%s%s%s", + "error_prefix=\"%s\",cache_size=1G%s%s%s%s", progname, g.debug_mode ? DEBUG_MODE_CFG : "", config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open, timing_stress ? timing_stress_cofing : "")); + config_open == NULL ? "" : config_open, timing_stress ? timing_stress_config : "")); } printf("WT open config: %s\n", config); if ((ret = wiredtiger_open(g.home, &event_handler, config, &g.conn)) != 0) diff --git a/test/checkpoint/workers.c b/test/checkpoint/workers.c index 1c3ac198d88..80f6dbf1e3a 100644 --- a/test/checkpoint/workers.c +++ b/test/checkpoint/workers.c @@ -272,7 +272,7 @@ worker_op(WT_CURSOR *cursor, table_type type, uint64_t keyno, u_int new_val) testutil_check(cursor->reset(cursor)); } else { if (new_val % 39 < 30) { - // Do modify + /* Do modify. */ ret = cursor->search(cursor); if (ret == 0 && (type != FIX || !cursor_fix_at_zero(cursor))) { modify_build(entries, &nentries, new_val); @@ -298,7 +298,7 @@ worker_op(WT_CURSOR *cursor, table_type type, uint64_t keyno, u_int new_val) } } - // If key doesn't exist, turn modify into an insert. + /* If key doesn't exist, turn modify into an insert. */ testutil_check(__wt_snprintf(valuebuf, sizeof(valuebuf), "%052u", new_val)); if (type == FIX) cursor->set_value(cursor, flcs_encode(valuebuf)); @@ -437,7 +437,7 @@ real_worker(void) testutil_check(__wt_snprintf( buf, sizeof(buf), "commit_timestamp=%" PRIx64, g.ts_stable + 1)); - // Commit majority of times + /* Commit majority of times. */ if (next_rnd % 49 != 0) { if ((ret = session->commit_transaction(session, buf)) != 0) { __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); @@ -458,7 +458,7 @@ real_worker(void) reopen_cursors = true; } } else { - // Commit majority of times + /* Commit majority of times. */ if (next_rnd % 49 != 0) { if ((ret = session->commit_transaction(session, NULL)) != 0) { (void)log_print_err("real_worker:commit_transaction", ret, 1); From 978cae0d255ff68d6e4434013851694fffdd7065 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 23 Feb 2022 13:56:23 -0800 Subject: [PATCH 15/20] WT-6422 Document expectation around ignore_prepare (#7568) Add more documentation around the ignore_prepare configuration. --- dist/api_data.py | 5 ++--- src/docs/timestamp-prepare.dox | 20 ++++++++++++++++++++ src/include/wiredtiger.in | 7 +++---- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/dist/api_data.py b/dist/api_data.py index 56c88f2d814..c748f372067 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -1733,9 +1733,8 @@ def __ge__(self, other): whether to ignore the updates by other prepared transactions as part of read operations of this transaction. When \c true, forces the transaction to be read-only. Use \c force to ignore prepared updates - and permit writes (which can cause lost updates unless the application - knows something about the relationship between prepared transactions - and the updates that are ignoring them)''', + and permit writes (see @ref timestamp_prepare_ignore_prepare for + more information)''', choices=['false', 'force', 'true']), Config('isolation', '', r''' the isolation level for this transaction; defaults to the diff --git a/src/docs/timestamp-prepare.dox b/src/docs/timestamp-prepare.dox index fa57ae0c600..9fd5e6c5e35 100644 --- a/src/docs/timestamp-prepare.dox +++ b/src/docs/timestamp-prepare.dox @@ -69,4 +69,24 @@ that conflicts of this type cannot arise. Prepared transactions have their own configuration keyword for rounding timestamps; see @ref timestamp_roundup for more information. +@section timestamp_prepare_ignore_prepare Configuring ignore_prepare + +The WT_SESSION::begin_transaction method includes the \c ignore_prepare +configuration. Setting the \c ignore_prepare configuration to true causes +readers to ignore prepared transactional values, that is, returning read +values as if the prepared transaction didn't exist. This prevents readers +from seeing the ::WT_PREPARE_CONFLICT error, returning the data as it was +before the transaction was prepared. For this reason, applications using +\c ignore_prepare cannot rely on repeatable reads, as the same read after the +prepared transaction is resolved could return a different value. Additionally, +setting the \c ignore_prepare configuration also causes the transaction to +be read-only, and attempts to update items in the transaction will fail. + +\warning +The \c ignore_prepare configuration can also be set to \c force, which not +only causes readers to ignore prepared transactions, but also allows the +transaction to make updates. This can cause data inconsistency problems with +the commit or rollback of the prepared transaction, or the disappearance of +a prepared update by overwriting it. + */ diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index d736f8f60dd..6616c730d5c 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1693,10 +1693,9 @@ struct __wt_session { * @configstart{WT_SESSION.begin_transaction, see dist/api_data.py} * @config{ignore_prepare, whether to ignore the updates by other prepared transactions as * part of read operations of this transaction. When \c true\, forces the transaction to be - * read-only. Use \c force to ignore prepared updates and permit writes (which can cause - * lost updates unless the application knows something about the relationship between - * prepared transactions and the updates that are ignoring them)., a string\, chosen from - * the following options: \c "false"\, \c "force"\, \c "true"; default \c false.} + * read-only. Use \c force to ignore prepared updates and permit writes (see @ref + * timestamp_prepare_ignore_prepare for more information)., a string\, chosen from the + * following options: \c "false"\, \c "force"\, \c "true"; default \c false.} * @config{isolation, the isolation level for this transaction; defaults to the session's * isolation level., a string\, chosen from the following options: \c "read-uncommitted"\, * \c "read-committed"\, \c "snapshot"; default empty.} From 8d6399919e27f3207b932521d765b6100b8ab467 Mon Sep 17 00:00:00 2001 From: Chenhao Qu Date: Thu, 24 Feb 2022 13:49:37 +1100 Subject: [PATCH 16/20] WT-8695 Remove file_close_sync config (#7583) --- dist/api_data.py | 10 --- examples/c/ex_all.c | 2 + src/config/config_def.c | 76 ++++++++-------- src/conn/conn_api.c | 4 - src/include/connection.h | 25 +++--- src/include/wiredtiger.in | 7 -- src/txn/txn_ckpt.c | 9 +- test/csuite/incr_backup/main.c | 3 +- test/csuite/wt2323_join_visibility/main.c | 8 +- .../csuite/wt3135_search_near_collator/main.c | 4 +- test/cursor_order/cursor_order_file.c | 2 +- test/format/wts.c | 6 ++ test/suite/test_autoclose.py | 2 +- test/suite/test_backup05.py | 2 +- test/suite/test_base04.py | 2 +- test/suite/test_bug001.py | 8 +- test/suite/test_bug004.py | 4 +- test/suite/test_bug005.py | 6 +- test/suite/test_bug006.py | 10 +-- test/suite/test_bulk01.py | 1 + test/suite/test_collator.py | 2 +- test/suite/test_config08.py | 87 ------------------- test/suite/test_cursor13.py | 16 ++-- test/suite/test_drop.py | 2 +- test/suite/test_dupc.py | 4 +- test/suite/test_durability01.py | 2 +- test/suite/test_import01.py | 4 +- test/suite/test_import03.py | 2 +- test/suite/test_import04.py | 2 +- test/suite/test_import06.py | 2 +- test/suite/test_import08.py | 2 +- test/suite/test_import09.py | 2 +- test/suite/test_import10.py | 2 +- test/suite/test_index01.py | 2 +- test/suite/test_index03.py | 2 +- test/suite/test_join01.py | 2 +- test/suite/test_join03.py | 2 +- test/suite/test_join06.py | 2 +- test/suite/test_join07.py | 2 +- test/suite/test_join09.py | 2 +- test/suite/test_lsm02.py | 4 +- test/suite/test_lsm03.py | 2 +- test/suite/test_rename.py | 4 +- test/suite/test_salvage.py | 2 +- test/suite/test_schema01.py | 2 +- test/suite/test_schema05.py | 2 +- test/suite/test_schema06.py | 2 +- test/suite/test_schema07.py | 2 +- test/suite/test_sweep03.py | 2 +- test/suite/test_truncate01.py | 12 +-- test/suite/test_upgrade.py | 4 +- test/suite/test_util11.py | 14 +-- test/suite/test_verify.py | 2 +- test/suite/wttest.py | 50 +++++++++++ test/thread/file.c | 6 +- test/utility/test_util.h | 24 +++++ 56 files changed, 220 insertions(+), 248 deletions(-) delete mode 100644 test/suite/test_config08.py diff --git a/dist/api_data.py b/dist/api_data.py index c748f372067..cd26d359de2 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -1215,16 +1215,6 @@ def __ge__(self, other): size and the default config would extend log files in allocations of the maximum log file size.''', type='list', choices=['data', 'log']), - Config('file_close_sync', 'true', r''' - control whether to flush modified files to storage independent - of a global checkpoint when closing file handles to acquire exclusive - access to a table. If set to false, and logging is disabled, API calls that - require exclusive access to tables will return EBUSY if there have been - changes made to the table since the last global checkpoint. When logging - is enabled, the value for file_close_sync has no effect, and, - modified file is always flushed to storage when closing file handles to - acquire exclusive access to the table''', - type='boolean'), Config('hash', '', r''' manage resources around hash bucket arrays. All values must be a power of two. Note that setting large values can significantly increase memory usage inside diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c index 3dab06fcf32..4247633cdb6 100644 --- a/examples/c/ex_all.c +++ b/examples/c/ex_all.c @@ -745,6 +745,8 @@ session_ops(WT_SESSION *session) } } + error_check(session->checkpoint(session, NULL)); + /*! [Upgrade a table] */ error_check(session->upgrade(session, "table:mytable", NULL)); /*! [Upgrade a table] */ diff --git a/src/config/config_def.c b/src/config/config_def.c index 85da105f2e6..37a2e2e35e3 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -863,7 +863,6 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { {"eviction_updates_target", "int", NULL, "min=0,max=10TB", NULL, 0}, {"eviction_updates_trigger", "int", NULL, "min=0,max=10TB", NULL, 0}, {"exclusive", "boolean", NULL, NULL, NULL, 0}, {"extensions", "list", NULL, NULL, NULL, 0}, - {"file_close_sync", "boolean", NULL, NULL, NULL, 0}, {"file_extend", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0}, {"file_manager", "category", NULL, NULL, confchk_wiredtiger_open_file_manager_subconfigs, 3}, {"hash", "category", NULL, NULL, confchk_wiredtiger_open_hash_subconfigs, 2}, @@ -946,7 +945,6 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { {"eviction_updates_target", "int", NULL, "min=0,max=10TB", NULL, 0}, {"eviction_updates_trigger", "int", NULL, "min=0,max=10TB", NULL, 0}, {"exclusive", "boolean", NULL, NULL, NULL, 0}, {"extensions", "list", NULL, NULL, NULL, 0}, - {"file_close_sync", "boolean", NULL, NULL, NULL, 0}, {"file_extend", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0}, {"file_manager", "category", NULL, NULL, confchk_wiredtiger_open_file_manager_subconfigs, 3}, {"hash", "category", NULL, NULL, confchk_wiredtiger_open_hash_subconfigs, 2}, @@ -1027,7 +1025,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { {"eviction_trigger", "int", NULL, "min=10,max=10TB", NULL, 0}, {"eviction_updates_target", "int", NULL, "min=0,max=10TB", NULL, 0}, {"eviction_updates_trigger", "int", NULL, "min=0,max=10TB", NULL, 0}, - {"extensions", "list", NULL, NULL, NULL, 0}, {"file_close_sync", "boolean", NULL, NULL, NULL, 0}, + {"extensions", "list", NULL, NULL, NULL, 0}, {"file_extend", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0}, {"file_manager", "category", NULL, NULL, confchk_wiredtiger_open_file_manager_subconfigs, 3}, {"hash", "category", NULL, NULL, confchk_wiredtiger_open_hash_subconfigs, 2}, @@ -1105,7 +1103,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { {"eviction_trigger", "int", NULL, "min=10,max=10TB", NULL, 0}, {"eviction_updates_target", "int", NULL, "min=0,max=10TB", NULL, 0}, {"eviction_updates_trigger", "int", NULL, "min=0,max=10TB", NULL, 0}, - {"extensions", "list", NULL, NULL, NULL, 0}, {"file_close_sync", "boolean", NULL, NULL, NULL, 0}, + {"extensions", "list", NULL, NULL, NULL, 0}, {"file_extend", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0}, {"file_manager", "category", NULL, NULL, confchk_wiredtiger_open_file_manager_subconfigs, 3}, {"hash", "category", NULL, NULL, confchk_wiredtiger_open_hash_subconfigs, 2}, @@ -1473,7 +1471,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", ",eviction_checkpoint_target=1,eviction_dirty_target=5," "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" ",eviction_updates_target=0,eviction_updates_trigger=0," - "exclusive=false,extensions=,file_close_sync=true,file_extend=," + "exclusive=false,extensions=,file_extend=," "file_manager=(close_handle_minimum=250,close_idle_time=30," "close_scan_interval=10),hash=(buckets=512,dhandle_buckets=512)," "hazard_max=1000,history_store=(file_max=0),in_memory=false," @@ -1494,7 +1492,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "transaction_sync=(enabled=false,method=fsync)," "use_environment=true,use_environment_priv=false,verbose=[]," "verify_metadata=false,write_through=", - confchk_wiredtiger_open, 59}, + confchk_wiredtiger_open, 58}, {"wiredtiger_open_all", "block_cache=(blkcache_eviction_aggression=1800," "cache_on_checkpoint=true,cache_on_writes=true,enabled=false," @@ -1513,7 +1511,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", ",eviction_checkpoint_target=1,eviction_dirty_target=5," "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" ",eviction_updates_target=0,eviction_updates_trigger=0," - "exclusive=false,extensions=,file_close_sync=true,file_extend=," + "exclusive=false,extensions=,file_extend=," "file_manager=(close_handle_minimum=250,close_idle_time=30," "close_scan_interval=10),hash=(buckets=512,dhandle_buckets=512)," "hazard_max=1000,history_store=(file_max=0),in_memory=false," @@ -1534,7 +1532,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "transaction_sync=(enabled=false,method=fsync)," "use_environment=true,use_environment_priv=false,verbose=[]," "verify_metadata=false,version=(major=0,minor=0),write_through=", - confchk_wiredtiger_open_all, 60}, + confchk_wiredtiger_open_all, 59}, {"wiredtiger_open_basecfg", "block_cache=(blkcache_eviction_aggression=1800," "cache_on_checkpoint=true,cache_on_writes=true,enabled=false," @@ -1553,27 +1551,26 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "eviction_checkpoint_target=1,eviction_dirty_target=5," "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" ",eviction_updates_target=0,eviction_updates_trigger=0," - "extensions=,file_close_sync=true,file_extend=," - "file_manager=(close_handle_minimum=250,close_idle_time=30," - "close_scan_interval=10),hash=(buckets=512,dhandle_buckets=512)," - "hazard_max=1000,history_store=(file_max=0),io_capacity=(total=0)" - ",json_output=[],log=(archive=true,compressor=,enabled=false," - "file_max=100MB,os_cache_dirty_pct=0,path=\".\",prealloc=true," - "recover=on,remove=true,zero_fill=false),lsm_manager=(merge=true," - "worker_thread_max=4),mmap=true,mmap_all=false,multiprocess=false" - ",operation_timeout_ms=0,operation_tracking=(enabled=false," - "path=\".\"),readonly=false,salvage=false,session_max=100," - "session_scratch_max=2MB,session_table_cache=true," - "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," - "statistics=none,statistics_log=(json=false,on_close=false," - "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," - "tiered_manager=(threads_max=8,threads_min=1,wait=0)," + "extensions=,file_extend=,file_manager=(close_handle_minimum=250," + "close_idle_time=30,close_scan_interval=10),hash=(buckets=512," + "dhandle_buckets=512),hazard_max=1000,history_store=(file_max=0)," + "io_capacity=(total=0),json_output=[],log=(archive=true," + "compressor=,enabled=false,file_max=100MB,os_cache_dirty_pct=0," + "path=\".\",prealloc=true,recover=on,remove=true,zero_fill=false)" + ",lsm_manager=(merge=true,worker_thread_max=4),mmap=true," + "mmap_all=false,multiprocess=false,operation_timeout_ms=0," + "operation_tracking=(enabled=false,path=\".\"),readonly=false," + "salvage=false,session_max=100,session_scratch_max=2MB," + "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0," + "reserve=0,size=500MB),statistics=none,statistics_log=(json=false" + ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\"" + ",wait=0),tiered_manager=(threads_max=8,threads_min=1,wait=0)," "tiered_storage=(auth_token=,bucket=,bucket_prefix=," "cache_directory=,local_retention=300,name=," "object_target_size=10M),timing_stress_for_test=," "transaction_sync=(enabled=false,method=fsync),verbose=[]," "verify_metadata=false,version=(major=0,minor=0),write_through=", - confchk_wiredtiger_open_basecfg, 54}, + confchk_wiredtiger_open_basecfg, 53}, {"wiredtiger_open_usercfg", "block_cache=(blkcache_eviction_aggression=1800," "cache_on_checkpoint=true,cache_on_writes=true,enabled=false," @@ -1592,27 +1589,26 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "eviction_checkpoint_target=1,eviction_dirty_target=5," "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" ",eviction_updates_target=0,eviction_updates_trigger=0," - "extensions=,file_close_sync=true,file_extend=," - "file_manager=(close_handle_minimum=250,close_idle_time=30," - "close_scan_interval=10),hash=(buckets=512,dhandle_buckets=512)," - "hazard_max=1000,history_store=(file_max=0),io_capacity=(total=0)" - ",json_output=[],log=(archive=true,compressor=,enabled=false," - "file_max=100MB,os_cache_dirty_pct=0,path=\".\",prealloc=true," - "recover=on,remove=true,zero_fill=false),lsm_manager=(merge=true," - "worker_thread_max=4),mmap=true,mmap_all=false,multiprocess=false" - ",operation_timeout_ms=0,operation_tracking=(enabled=false," - "path=\".\"),readonly=false,salvage=false,session_max=100," - "session_scratch_max=2MB,session_table_cache=true," - "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," - "statistics=none,statistics_log=(json=false,on_close=false," - "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," - "tiered_manager=(threads_max=8,threads_min=1,wait=0)," + "extensions=,file_extend=,file_manager=(close_handle_minimum=250," + "close_idle_time=30,close_scan_interval=10),hash=(buckets=512," + "dhandle_buckets=512),hazard_max=1000,history_store=(file_max=0)," + "io_capacity=(total=0),json_output=[],log=(archive=true," + "compressor=,enabled=false,file_max=100MB,os_cache_dirty_pct=0," + "path=\".\",prealloc=true,recover=on,remove=true,zero_fill=false)" + ",lsm_manager=(merge=true,worker_thread_max=4),mmap=true," + "mmap_all=false,multiprocess=false,operation_timeout_ms=0," + "operation_tracking=(enabled=false,path=\".\"),readonly=false," + "salvage=false,session_max=100,session_scratch_max=2MB," + "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0," + "reserve=0,size=500MB),statistics=none,statistics_log=(json=false" + ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\"" + ",wait=0),tiered_manager=(threads_max=8,threads_min=1,wait=0)," "tiered_storage=(auth_token=,bucket=,bucket_prefix=," "cache_directory=,local_retention=300,name=," "object_target_size=10M),timing_stress_for_test=," "transaction_sync=(enabled=false,method=fsync),verbose=[]," "verify_metadata=false,write_through=", - confchk_wiredtiger_open_usercfg, 53}, + confchk_wiredtiger_open_usercfg, 52}, {NULL, NULL, NULL, 0}}; int diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 65e9cd3b35e..8fd648cae8e 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -2839,10 +2839,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, const char *c if (cval.val) F_SET(conn, WT_CONN_CKPT_SYNC); - WT_ERR(__wt_config_gets(session, cfg, "file_close_sync", &cval)); - if (cval.val) - F_SET(conn, WT_CONN_FILE_CLOSE_SYNC); - WT_ERR(__wt_config_gets(session, cfg, "file_extend", &cval)); /* * If the log extend length is not set use the default of the configured maximum log file size. diff --git a/src/include/connection.h b/src/include/connection.h index 2d75b90d608..48aa7ccc50d 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -626,19 +626,18 @@ struct __wt_connection_impl { #define WT_CONN_COMPATIBILITY 0x000080u #define WT_CONN_DATA_CORRUPTION 0x000100u #define WT_CONN_EVICTION_RUN 0x000200u -#define WT_CONN_FILE_CLOSE_SYNC 0x000400u -#define WT_CONN_HS_OPEN 0x000800u -#define WT_CONN_INCR_BACKUP 0x001000u -#define WT_CONN_IN_MEMORY 0x002000u -#define WT_CONN_LEAK_MEMORY 0x004000u -#define WT_CONN_LSM_MERGE 0x008000u -#define WT_CONN_OPTRACK 0x010000u -#define WT_CONN_PANIC 0x020000u -#define WT_CONN_READONLY 0x040000u -#define WT_CONN_RECONFIGURING 0x080000u -#define WT_CONN_RECOVERING 0x100000u -#define WT_CONN_SALVAGE 0x200000u -#define WT_CONN_WAS_BACKUP 0x400000u +#define WT_CONN_HS_OPEN 0x000400u +#define WT_CONN_INCR_BACKUP 0x000800u +#define WT_CONN_IN_MEMORY 0x001000u +#define WT_CONN_LEAK_MEMORY 0x002000u +#define WT_CONN_LSM_MERGE 0x004000u +#define WT_CONN_OPTRACK 0x008000u +#define WT_CONN_PANIC 0x010000u +#define WT_CONN_READONLY 0x020000u +#define WT_CONN_RECONFIGURING 0x040000u +#define WT_CONN_RECOVERING 0x080000u +#define WT_CONN_SALVAGE 0x100000u +#define WT_CONN_WAS_BACKUP 0x200000u /* AUTOMATIC FLAG VALUE GENERATION STOP 32 */ uint32_t flags; }; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 6616c730d5c..1629da06cfa 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -2979,13 +2979,6 @@ struct __wt_connection { * specified to a library extension are passed to WT_CONNECTION::load_extension as the \c config * parameter (for example\, extensions=(/path/ext.so={entry=my_entry}))., a list of * strings; default empty.} - * @config{file_close_sync, control whether to flush modified files to storage independent of a - * global checkpoint when closing file handles to acquire exclusive access to a table. If set to - * false\, and logging is disabled\, API calls that require exclusive access to tables will return - * EBUSY if there have been changes made to the table since the last global checkpoint. When - * logging is enabled\, the value for file_close_sync has no effect\, and\, modified - * file is always flushed to storage when closing file handles to acquire exclusive access to the - * table., a boolean flag; default \c true.} * @config{file_extend, file extension configuration. If set\, extend files of the set type in * allocations of the set size\, instead of a block at a time as each new block is written. For * example\, file_extend=(data=16MB). If set to 0\, disable the file extension for the diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 60ac959e901..366e3f83e2d 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -2103,13 +2103,10 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) return (__wt_evict_file(session, WT_SYNC_DISCARD)); /* - * Don't flush data from modified trees independent of system-wide checkpoint when either there - * is a stable timestamp set or the connection is configured to disallow such operation. - * Flushing trees can lead to files that are inconsistent on disk after a crash. + * Don't flush data from modified trees independent of system-wide checkpoint. Flushing trees + * can lead to files that are inconsistent on disk after a crash. */ - if (btree->modified && !bulk && !F_ISSET(btree, WT_BTREE_LOGGED) && - (S2C(session)->txn_global.has_stable_timestamp || - (!F_ISSET(S2C(session), WT_CONN_FILE_CLOSE_SYNC) && !metadata))) + if (btree->modified && !bulk && !metadata) return (__wt_set_return(session, EBUSY)); /* diff --git a/test/csuite/incr_backup/main.c b/test/csuite/incr_backup/main.c index 2b5329dc02d..042c3adc44f 100644 --- a/test/csuite/incr_backup/main.c +++ b/test/csuite/incr_backup/main.c @@ -473,6 +473,7 @@ rename_table(WT_SESSION *session, TABLE_INFO *tinfo, uint32_t slot) olduri = tinfo->table[slot].name; VERBOSE(3, "rename %s %s\n", olduri, uri); + testutil_check(session->checkpoint(session, NULL)); testutil_check(session->rename(session, olduri, uri, NULL)); free(olduri); tinfo->table[slot].name = uri; @@ -491,7 +492,7 @@ drop_table(WT_SESSION *session, TABLE_INFO *tinfo, uint32_t slot) uri = tinfo->table[slot].name; VERBOSE(3, "drop %s\n", uri); - testutil_check(session->drop(session, uri, NULL)); + testutil_drop(session, uri, NULL); free(uri); tinfo->table[slot].name = NULL; tinfo->table[slot].change_count = 0; diff --git a/test/csuite/wt2323_join_visibility/main.c b/test/csuite/wt2323_join_visibility/main.c index 304bb441e87..7497a73864e 100644 --- a/test/csuite/wt2323_join_visibility/main.c +++ b/test/csuite/wt2323_join_visibility/main.c @@ -210,10 +210,10 @@ test_join(TEST_OPTS *opts, SHARED_OPTS *sharedopts, bool bloom, bool sometimes_r insert_args[i].inserts, insert_args[i].removes, insert_args[i].notfounds, insert_args[i].rollbacks); - testutil_check(session->drop(session, sharedopts->posturi, NULL)); - testutil_check(session->drop(session, sharedopts->baluri, NULL)); - testutil_check(session->drop(session, sharedopts->flaguri, NULL)); - testutil_check(session->drop(session, opts->uri, NULL)); + testutil_drop(session, sharedopts->posturi, NULL); + testutil_drop(session, sharedopts->baluri, NULL); + testutil_drop(session, sharedopts->flaguri, NULL); + testutil_drop(session, opts->uri, NULL); testutil_check(session->close(session, NULL)); } diff --git a/test/csuite/wt3135_search_near_collator/main.c b/test/csuite/wt3135_search_near_collator/main.c index 841bd27adf6..f0ba1b330c5 100644 --- a/test/csuite/wt3135_search_near_collator/main.c +++ b/test/csuite/wt3135_search_near_collator/main.c @@ -327,8 +327,8 @@ test_one_set(WT_SESSION *session, TEST_SET set) search_using_item(cursor, set, i); testutil_check(cursor->close(cursor)); - testutil_check(session->drop(session, "table:main", NULL)); - testutil_check(session->drop(session, "table:main2", NULL)); + testutil_drop(session, "table:main", NULL); + testutil_drop(session, "table:main2", NULL); } /* diff --git a/test/cursor_order/cursor_order_file.c b/test/cursor_order/cursor_order_file.c index 74addeb001a..690e8029f43 100644 --- a/test/cursor_order/cursor_order_file.c +++ b/test/cursor_order/cursor_order_file.c @@ -124,7 +124,7 @@ verify(SHARED_CONFIG *cfg, const char *name) testutil_check(conn->open_session(conn, NULL, NULL, &session)); - testutil_check(session->verify(session, name, NULL)); + testutil_verify(session, name, NULL); testutil_check(session->close(session, NULL)); } diff --git a/test/format/wts.c b/test/format/wts.c index 93efaeeac80..92f8f6bbf5a 100644 --- a/test/format/wts.c +++ b/test/format/wts.c @@ -544,6 +544,12 @@ wts_verify(TABLE *table, void *arg) * LSM, the handle may not be available for a long time. */ testutil_check(conn->open_session(conn, NULL, NULL, &session)); + /* + * Do a full checkpoint to reduce the possibility of returning EBUSY from the following verify + * call. + */ + ret = session->checkpoint(session, NULL); + testutil_assert(ret == 0 || ret == EBUSY); session->app_private = table->track_prefix; ret = session->verify(session, table->uri, "strict"); testutil_assert(ret == 0 || ret == EBUSY); diff --git a/test/suite/test_autoclose.py b/test/suite/test_autoclose.py index b934ae8d1fb..7259c29c30f 100755 --- a/test/suite/test_autoclose.py +++ b/test/suite/test_autoclose.py @@ -42,7 +42,7 @@ def create_table(self): 'key_format=S,value_format=S') def drop_table(self): - self.session.drop(self.uri, None) + self.dropUntilSuccess(self.session, self.uri) def open_cursor(self): cursor = self.session.open_cursor(self.uri, None, None) diff --git a/test/suite/test_backup05.py b/test/suite/test_backup05.py index 55803060866..69efc3078a0 100644 --- a/test/suite/test_backup05.py +++ b/test/suite/test_backup05.py @@ -112,7 +112,7 @@ def backup(self): if i % self.freq == 0: self.check_manual_backup(i, ".", "RESTART") else: - self.session.verify(self.uri) + self.verifyUntilSuccess(self.session, self.uri) def test_backup(self): with self.expectedStdoutPattern('recreating metadata'): diff --git a/test/suite/test_base04.py b/test/suite/test_base04.py index 39a8f24cc47..6a4790320ba 100644 --- a/test/suite/test_base04.py +++ b/test/suite/test_base04.py @@ -46,7 +46,7 @@ def create_table(self): def drop_table(self): self.pr('drop table') - self.session.drop(self.tablename, None) + self.dropUntilSuccess(self.session, self.tablename) def cursor(self): self.pr('open cursor') diff --git a/test/suite/test_bug001.py b/test/suite/test_bug001.py index 25620d6bd6a..d780abf24f0 100644 --- a/test/suite/test_bug001.py +++ b/test/suite/test_bug001.py @@ -74,7 +74,7 @@ def test_implicit_record_cursor_movement(self): self.assertEqual(cursor.prev(), 0) self.assertEquals(cursor.close(), 0) - self.session.drop(uri) + self.dropUntilSuccess(self.session, uri) cursor = self.create_implicit(uri, 20, 50, 0) # Check search inside leading implicit keys. @@ -98,7 +98,7 @@ def test_implicit_record_cursor_movement(self): self.assertEqual(cursor.prev(), 0) self.assertEquals(cursor.close(), 0) - self.session.drop(uri) + self.dropUntilSuccess(self.session, uri) # Test a bug where cursor remove inside implicit records looped infinitely. def test_implicit_record_cursor_remove(self): @@ -124,7 +124,7 @@ def test_implicit_record_cursor_remove(self): self.assertEquals(cursor.remove(), 0) self.assertEquals(cursor.close(), 0) - self.session.drop(uri) + self.dropUntilSuccess(self.session, uri) cursor = self.create_implicit(uri, 20, 50, 0) # Check cursor next/remove inside leading implicit keys. @@ -146,7 +146,7 @@ def test_implicit_record_cursor_remove(self): self.assertEquals(cursor.remove(), 0) self.assertEquals(cursor.close(), 0) - self.session.drop(uri) + self.dropUntilSuccess(self.session, uri) if __name__ == '__main__': wttest.run() diff --git a/test/suite/test_bug004.py b/test/suite/test_bug004.py index a3cbc7d55ab..4338846e049 100644 --- a/test/suite/test_bug004.py +++ b/test/suite/test_bug004.py @@ -74,9 +74,9 @@ def test_bug004(self): c1.close() # Verify the object, force it to disk, and verify the on-disk version. - self.session.verify(self.uri) + self.verifyUntilSuccess(self.session, self.uri) self.reopen_conn() - self.session.verify(self.uri) + self.verifyUntilSuccess(self.session, self.uri) # Create a new session and start a transaction to force the engine # to access old versions of the key/value pairs. diff --git a/test/suite/test_bug005.py b/test/suite/test_bug005.py index 662d2f81061..9b7be91be89 100644 --- a/test/suite/test_bug005.py +++ b/test/suite/test_bug005.py @@ -51,9 +51,9 @@ def test_bug005(self): cursor.close() # Verify the object, force it to disk, and verify the on-disk version. - self.session.verify(self.uri) + self.verifyUntilSuccess(self.session, self.uri) self.reopen_conn() - self.session.verify(self.uri) + self.verifyUntilSuccess(self.session, self.uri) # Append random data to the end. f = open('test_bug005', 'a') @@ -61,7 +61,7 @@ def test_bug005(self): f.close() # Verify the object again. - self.session.verify(self.uri) + self.verifyUntilSuccess(self.session, self.uri) if __name__ == '__main__': wttest.run() diff --git a/test/suite/test_bug006.py b/test/suite/test_bug006.py index f2aafb5c0ef..835a3caa506 100644 --- a/test/suite/test_bug006.py +++ b/test/suite/test_bug006.py @@ -65,14 +65,14 @@ def test_bug006(self): cursor.close() # Table operations should succeed, the cursor is closed. - self.session.rename(uri, self.uri + "new", None) - self.session.rename(self.uri + "new", uri, None) + self.renameUntilSuccess(self.session, uri, self.uri + "new") + self.renameUntilSuccess(self.session, self.uri + "new", uri) self.session.salvage(uri, None) self.session.truncate(uri, None, None, None) - self.session.upgrade(uri, None) - self.session.verify(uri, None) + self.upgradeUntilSuccess(self.session, uri) + self.verifyUntilSuccess(self.session, uri) - self.session.drop(uri, None) + self.dropUntilSuccess(self.session, uri) if __name__ == '__main__': wttest.run() diff --git a/test/suite/test_bulk01.py b/test/suite/test_bulk01.py index 7e7473b57f1..76893efd9ce 100755 --- a/test/suite/test_bulk01.py +++ b/test/suite/test_bulk01.py @@ -200,6 +200,7 @@ def test_bulk_load_not_empty(self): cursor[simple_key(cursor, 1)] = simple_value(cursor, 1) # Close the insert cursor, else we'll get EBUSY. cursor.close() + self.session.checkpoint() msg = '/bulk-load is only supported on newly created objects/' self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.open_cursor(uri, None, "bulk"), msg) diff --git a/test/suite/test_collator.py b/test/suite/test_collator.py index d884ac47cf7..64c0e9151d3 100644 --- a/test/suite/test_collator.py +++ b/test/suite/test_collator.py @@ -64,7 +64,7 @@ def create_indices(self): def drop_indices(self): for i in range(0, self.nindices): - self.session.drop("index:collator:x" + str(i)) + self.dropUntilSuccess(self.session, "index:collator:x" + str(i)) def csv(self, s, i): return s.split(',')[i] diff --git a/test/suite/test_config08.py b/test/suite/test_config08.py deleted file mode 100644 index cbffae9bf2d..00000000000 --- a/test/suite/test_config08.py +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-present MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# test_config08.py -# Test the configuration that enables/disables dirty table flushing. -# - -import wiredtiger, wttest -from wtscenario import make_scenarios - -class test_config08(wttest.WiredTigerTestCase): - - logging = [ - ('log_off', dict(logging='false')), - ('log_on', dict(logging='true')), - ] - flush = [ - ('flush_off', dict(flush='false')), - ('flush_on', dict(flush='true')), - ] - - scenarios = make_scenarios(logging, flush) - - # This test varies the logging and file flush settings and therefore needs to set up its own - # connection config. Override the standard method. - def conn_config(self): - return 'create,log=(enabled={}),file_close_sync={}'.\ - format(self.logging,self.flush) - - def test_config08(self): - # Create a table with logging setting matching the connection level config. - table_params = 'key_format=i,value_format=S,log=(enabled={})'.format(self.logging) - self.uri = 'table:config_test' - - # Create a table with some data. - self.session.create(self.uri, table_params) - c = self.session.open_cursor(self.uri, None) - c[0] = 'ABC' * 4096 - c[1] = 'DEF' * 4096 - c[2] = 'GHI' * 4096 - c.close() - - # API calls that require exclusive file handles should return EBUSY if file_close_sync is - # set to false and logging is disabled. - if self.logging == 'false' and self.flush == 'false': - # WT won't allow this operation as exclsuive file handle is not possible - # with modified table. - self.assertTrue(self.raisesBusy(lambda: self.session.verify(self.uri, None)), - "was expecting API call to fail with EBUSY") - - # Taking a checkopoint should make WT happy. - self.session.checkpoint() - self.session.verify(self.uri, None) - else: - # All other combinations of configs should not return EBUSY. - self.session.verify(self.uri, None) - - # This will catch a bug if we return EBUSY from final shutdown. - self.conn.close() - -if __name__ == '__main__': - wttest.run() diff --git a/test/suite/test_cursor13.py b/test/suite/test_cursor13.py index 7651af1808c..b3e3a5175df 100755 --- a/test/suite/test_cursor13.py +++ b/test/suite/test_cursor13.py @@ -290,7 +290,7 @@ def test_verify(self): ds.check() c.close() s2 = self.conn.open_session() - s2.verify(self.uri) + self.verifyUntilSuccess(s2, self.uri) s2.close() class test_cursor13_drops(test_cursor13_base): @@ -302,7 +302,7 @@ def open_and_drop(self, uri, cursor_session, drop_session, nopens, ntrials): c.close() # The cursor cache is unaffected by the drop, and nothing # in the cache should prevent the drop from occurring. - drop_session.drop(uri) + self.dropUntilSuccess(drop_session, uri) confirm_does_not_exist(self, uri) def test_open_and_drop(self): @@ -332,7 +332,7 @@ def test_open_index_and_drop(self): self.assertRaises(wiredtiger.WiredTigerError, lambda: session.drop(uri)) c.close() - session.drop(uri) + self.dropUntilSuccess(session, uri) confirm_does_not_exist(self, uri) # Same test for indices, but with cursor held by another session. @@ -346,7 +346,7 @@ def test_open_index_and_drop(self): self.assertRaises(wiredtiger.WiredTigerError, lambda: session.drop(uri)) c.close() - session.drop(uri) + self.dropUntilSuccess(session, uri) confirm_does_not_exist(self, uri) session2.close() @@ -358,7 +358,7 @@ def test_cursor_drops(self): for i in range(0, 2): session.create(uri, config) - session.drop(uri) + self.dropUntilSuccess(session, uri) for i in range(0, 2): session.create(uri, config) @@ -367,7 +367,7 @@ def test_cursor_drops(self): self.assertRaises(wiredtiger.WiredTigerError, lambda: session.drop(uri)) cursor.close() - session.drop(uri) + self.dropUntilSuccess(session, uri) for i in range(0, 2): session.create(uri, config) @@ -377,7 +377,7 @@ def test_cursor_drops(self): self.assertRaises(wiredtiger.WiredTigerError, lambda: session.drop(uri)) cursor.close() - session.drop(uri) + self.dropUntilSuccess(session, uri) for i in range(0, 2): session.create(uri, config) @@ -389,7 +389,7 @@ def test_cursor_drops(self): self.assertRaises(wiredtiger.WiredTigerError, lambda: session.drop(uri)) cursor.close() - session.drop(uri) + self.dropUntilSuccess(session, uri) # Shared base class for some bigger tests. class test_cursor13_big_base(test_cursor13_base): diff --git a/test/suite/test_drop.py b/test/suite/test_drop.py index 97ab51d0ad7..e9f0aaea4ea 100644 --- a/test/suite/test_drop.py +++ b/test/suite/test_drop.py @@ -63,7 +63,7 @@ def drop(self, dataset, with_cursor, reopen, drop_index): drop_uri = ds.index_name(0) else: drop_uri = uri - self.session.drop(drop_uri, None) + self.dropUntilSuccess(self.session, drop_uri) confirm_does_not_exist(self, drop_uri) # Test drop of an object. diff --git a/test/suite/test_dupc.py b/test/suite/test_dupc.py index da93a7be298..890af88c63c 100644 --- a/test/suite/test_dupc.py +++ b/test/suite/test_dupc.py @@ -76,7 +76,7 @@ def test_duplicate_cursor(self): key_format=self.keyfmt, value_format=self.valfmt) ds.populate() self.iterate(uri, ds) - self.session.drop(uri, None) + self.dropUntilSuccess(self.session, uri) # A complex, multi-file table object. if self.uri == "table:" and self.valfmt != '8t': @@ -84,7 +84,7 @@ def test_duplicate_cursor(self): key_format=self.keyfmt, value_format=self.valfmt) ds.populate() self.iterate(uri, ds) - self.session.drop(uri, None) + self.dropUntilSuccess(self.session, uri) if __name__ == '__main__': wttest.run() diff --git a/test/suite/test_durability01.py b/test/suite/test_durability01.py index c9226001040..c2210576eae 100644 --- a/test/suite/test_durability01.py +++ b/test/suite/test_durability01.py @@ -70,7 +70,7 @@ def test_durability(self): if i % 5 == 0: self.session.checkpoint() else: - self.session.verify(self.uri) + self.verifyUntilSuccess(self.session, self.uri) self.check_crash_restart(".", "RESTART") if __name__ == '__main__': diff --git a/test/suite/test_import01.py b/test/suite/test_import01.py index aebcaacef5c..79a6e45a8e2 100644 --- a/test/suite/test_import01.py +++ b/test/suite/test_import01.py @@ -169,7 +169,7 @@ def test_file_import(self): self.session.create(self.uri, import_config) # Verify object. - self.session.verify(self.uri) + self.verifyUntilSuccess(self.session, self.uri, None) # Check that the previously inserted values survived the import. self.check(self.uri, self.keys[:max_idx], self.values[:max_idx]) @@ -232,7 +232,7 @@ def test_file_import_dropped_file(self): self.session.create(self.uri, import_config) # Verify object. - self.session.verify(self.uri) + self.verifyUntilSuccess(self.session, self.uri, None) # Check that the previously inserted values survived the import. self.check(self.uri, self.keys, self.values) diff --git a/test/suite/test_import03.py b/test/suite/test_import03.py index 564db864558..73e0d67debb 100644 --- a/test/suite/test_import03.py +++ b/test/suite/test_import03.py @@ -128,7 +128,7 @@ def test_table_import(self): self.session.create(uri, import_config) # Verify object. - self.session.verify(uri) + self.verifyUntilSuccess(self.session, uri, None) # Check that the previously inserted values survived the import. self.check(uri, keys[:max_idx], values[:max_idx]) diff --git a/test/suite/test_import04.py b/test/suite/test_import04.py index 23f1a1ab379..f2ff1626f28 100644 --- a/test/suite/test_import04.py +++ b/test/suite/test_import04.py @@ -191,7 +191,7 @@ def test_table_import(self): self.session.create(uri, import_config) # Verify object. - self.session.verify(uri) + self.verifyUntilSuccess(self.session, uri, None) # Check that the previously inserted values survived the import. self.check(uri, keys[:max_idx], values[:max_idx]) diff --git a/test/suite/test_import06.py b/test/suite/test_import06.py index 0046f8e0c8c..f56c7a218d0 100644 --- a/test/suite/test_import06.py +++ b/test/suite/test_import06.py @@ -143,7 +143,7 @@ def test_import_repair(self): self.session.create(self.uri, import_config) # Verify object. - self.session.verify(self.uri) + self.verifyUntilSuccess(self.session, self.uri, None) # Check that the previously inserted values survived the import. self.check(self.uri, self.keys[:max_idx], self.values[:max_idx]) diff --git a/test/suite/test_import08.py b/test/suite/test_import08.py index c6eb6a7da4a..bfa06bfc184 100644 --- a/test/suite/test_import08.py +++ b/test/suite/test_import08.py @@ -142,7 +142,7 @@ def test_import_write_gen(self): self.session.create(self.uri, import_config) # Verify object. - self.session.verify(self.uri) + self.verifyUntilSuccess(self.session, self.uri, None) # Check the write generation of the new table. # diff --git a/test/suite/test_import09.py b/test/suite/test_import09.py index 603708aada8..e5ba1be8e0f 100644 --- a/test/suite/test_import09.py +++ b/test/suite/test_import09.py @@ -169,7 +169,7 @@ def test_import_table_repair(self): self.session.create(uri, import_config) # Verify object. - self.session.verify(uri) + self.verifyUntilSuccess(self.session, uri, None) # Check that the previously inserted values survived the import. self.check(uri, keys[:max_idx], values[:max_idx]) diff --git a/test/suite/test_import10.py b/test/suite/test_import10.py index 7715a2754ad..1b46d989f85 100644 --- a/test/suite/test_import10.py +++ b/test/suite/test_import10.py @@ -78,7 +78,7 @@ def test_import_with_open_backup_cursor(self): self.session.create(table_uri, import_config) # Verify object. - self.session.verify(table_uri) + self.verifyUntilSuccess(self.session, table_uri) # Check that the data got imported correctly. cursor = self.session.open_cursor(table_uri) diff --git a/test/suite/test_index01.py b/test/suite/test_index01.py index 037e623b881..9d3b1f63338 100755 --- a/test/suite/test_index01.py +++ b/test/suite/test_index01.py @@ -52,7 +52,7 @@ def create_table(self): def drop_table(self): self.pr('drop table') - self.session.drop(self.tablename, None) + self.dropUntilSuccess(self.session, self.tablename) def cursor(self, config=None): self.pr('open cursor') diff --git a/test/suite/test_index03.py b/test/suite/test_index03.py index c619a7fa6f6..7cdf41b33bf 100644 --- a/test/suite/test_index03.py +++ b/test/suite/test_index03.py @@ -65,7 +65,7 @@ def test_index_create(self): self.assertRaises(wiredtiger.WiredTigerError, lambda: session.drop(index2_uri)) c1.close() - session.drop(index2_uri) + self.dropUntilSuccess(session, index2_uri) if __name__ == '__main__': wttest.run() diff --git a/test/suite/test_join01.py b/test/suite/test_join01.py index 1449b679247..cabb8a23f1a 100644 --- a/test/suite/test_join01.py +++ b/test/suite/test_join01.py @@ -335,7 +335,7 @@ def test_join(self): nest2.close() for c in closeme: c.close() - self.session.drop('table:join01') + self.dropUntilSuccess(self.session, 'table:join01') if __name__ == '__main__': wttest.run() diff --git a/test/suite/test_join03.py b/test/suite/test_join03.py index 0eff3f26dba..9d96306c39a 100644 --- a/test/suite/test_join03.py +++ b/test/suite/test_join03.py @@ -118,7 +118,7 @@ def join(self, csvformat, args0, args1): c1a.close() c1b.close() c0.close() - self.session.drop('table:join03') + self.dropUntilSuccess(self.session, 'table:join03') # Test joins using CSV fields that are interpreted as different types # to make sure all the extractor plumbing used in joins is working. diff --git a/test/suite/test_join06.py b/test/suite/test_join06.py index e6e5dd81506..811079ecfd8 100644 --- a/test/suite/test_join06.py +++ b/test/suite/test_join06.py @@ -152,7 +152,7 @@ def test_join(self): c0.close() if self.isolation != '': self.session.commit_transaction() - self.session.drop('table:join06') + self.dropUntilSuccess(self.session, 'table:join06') if __name__ == '__main__': wttest.run() diff --git a/test/suite/test_join07.py b/test/suite/test_join07.py index b51113dfe34..5eff0250d26 100644 --- a/test/suite/test_join07.py +++ b/test/suite/test_join07.py @@ -496,7 +496,7 @@ def interpret(self, s): self.iterate(jc, mbr) self.close_cursors(jc) - self.session.drop('table:join07') + self.dropUntilSuccess(self.session, 'table:join07') def test_join_string(self): self.interpret("[N=1000][key=r] 7 < A <= 500 && B < 150 && C > 17") diff --git a/test/suite/test_join09.py b/test/suite/test_join09.py index 2fedd0c99ed..9405acba1d7 100644 --- a/test/suite/test_join09.py +++ b/test/suite/test_join09.py @@ -108,7 +108,7 @@ def test_join(self): jc.close() c1.close() c0.close() - self.session.drop('table:join09') + self.dropUntilSuccess(self.session, 'table:join09') if __name__ == '__main__': wttest.run() diff --git a/test/suite/test_lsm02.py b/test/suite/test_lsm02.py index 6a9b62331f2..deaa94e3038 100755 --- a/test/suite/test_lsm02.py +++ b/test/suite/test_lsm02.py @@ -62,13 +62,13 @@ def test_lsm_tombstone(self): def test_lsm_rename01(self): self.session.create(self.uri, 'key_format=S,value_format=S') self.add_key(self.uri, 'a', 'a') - self.session.rename(self.uri, self.uri + 'renamed', None) + self.renameUntilSuccess(self.session, self.uri, self.uri + 'renamed') self.verify_key_exists(self.uri + 'renamed', 'a', 'a') def test_lsm_rename02(self): self.session.create(self.uri, 'key_format=S,value_format=S') self.add_key(self.uri, 'a', 'a') - self.session.rename(self.uri, self.uri + 'renamed', None) + self.renameUntilSuccess(self.session, self.uri, self.uri + 'renamed') # Create a new LSM with the original name self.session.create(self.uri, 'key_format=S,value_format=S') diff --git a/test/suite/test_lsm03.py b/test/suite/test_lsm03.py index c7cc20255b1..6994b557da7 100644 --- a/test/suite/test_lsm03.py +++ b/test/suite/test_lsm03.py @@ -59,4 +59,4 @@ def test_lsm_drop_active(self): ds = SimpleDataSet(self, uri, 50000, config=self.config) ds.populate() # The drop should succeed even when LSM work units are active - self.session.drop(uri) + self.dropUntilSuccess(self.session, uri) diff --git a/test/suite/test_rename.py b/test/suite/test_rename.py index d46280108f9..3ec8ef3bdbc 100644 --- a/test/suite/test_rename.py +++ b/test/suite/test_rename.py @@ -59,11 +59,11 @@ def rename(self, dataset, with_cursor): lambda: self.session.rename(uri1, uri2, None)) cursor.close() - self.session.rename(uri1, uri2, None) + self.renameUntilSuccess(self.session, uri1, uri2) confirm_does_not_exist(self, uri1) ds2.check() - self.session.rename(uri2, uri1, None) + self.renameUntilSuccess(self.session, uri2, uri1) confirm_does_not_exist(self, uri2) ds1.check() diff --git a/test/suite/test_salvage.py b/test/suite/test_salvage.py index fc035cf1d2f..49f8462362f 100755 --- a/test/suite/test_salvage.py +++ b/test/suite/test_salvage.py @@ -261,7 +261,7 @@ def test_salvage_api(self): self.moreinit() self.session.create('table:' + self.tablename, self.session_params) self.populate(self.tablename) - self.session.salvage('file:' + self.tablename + ".wt", None) + self.salvageUntilSuccess(self.session, 'file:' + self.tablename + ".wt") self.check_populate(self.tablename) def test_salvage_api_damaged(self): diff --git a/test/suite/test_schema01.py b/test/suite/test_schema01.py index 5e5496f6fb0..d4b8f9f5799 100644 --- a/test/suite/test_schema01.py +++ b/test/suite/test_schema01.py @@ -66,7 +66,7 @@ def create_table(self): def drop_table(self): self.pr('drop table') - self.session.drop(self.tablename) + self.dropUntilSuccess(self.session, self.tablename) def cursor(self, config=None): self.pr('open cursor') diff --git a/test/suite/test_schema05.py b/test/suite/test_schema05.py index 470c1b42fbd..c99f65dad4a 100644 --- a/test/suite/test_schema05.py +++ b/test/suite/test_schema05.py @@ -71,7 +71,7 @@ def create_indices(self): def drop_indices(self): for i in range(0, self.nindices): - self.session.drop("index:schema05:x" + str(i)) + self.dropUntilSuccess(self.session, "index:schema05:x" + str(i)) def csv(self, s, i): return s.split(',')[i] diff --git a/test/suite/test_schema06.py b/test/suite/test_schema06.py index 6224b409b33..f23d6de834f 100644 --- a/test/suite/test_schema06.py +++ b/test/suite/test_schema06.py @@ -64,7 +64,7 @@ def create_index(self, inum): def drop_index(self, inum): colname = "s" + str(inum) - self.session.drop("index:schema06:" + colname, None) + self.dropUntilSuccess(self.session, "index:schema06:" + colname) def test_index_stress(self): self.session.create("table:schema06", diff --git a/test/suite/test_schema07.py b/test/suite/test_schema07.py index 386552bb6aa..bf01a1b7548 100644 --- a/test/suite/test_schema07.py +++ b/test/suite/test_schema07.py @@ -47,7 +47,7 @@ def test_many_tables(self): # This will block if the metadata fills the cache c["key"] = "value" c.close() - self.session.drop(uri) + self.dropUntilSuccess(self.session, uri) if __name__ == '__main__': wttest.run() diff --git a/test/suite/test_sweep03.py b/test/suite/test_sweep03.py index 265f8c44dce..92ff2b59b78 100644 --- a/test/suite/test_sweep03.py +++ b/test/suite/test_sweep03.py @@ -153,7 +153,7 @@ def test_disable_idle_timeout_drop(self): sweep_baseline = stat_cursor[stat.conn.dh_sweeps][2] stat_cursor.close() - self.session.drop(drop_uri, None) + self.dropUntilSuccess(self.session, drop_uri) sweep_baseline = self.wait_for_sweep(sweep_baseline) diff --git a/test/suite/test_truncate01.py b/test/suite/test_truncate01.py index 39bffc5f163..3e99611a549 100644 --- a/test/suite/test_truncate01.py +++ b/test/suite/test_truncate01.py @@ -96,13 +96,13 @@ def test_truncate_uri(self): SimpleDataSet(self, uri, 100).populate() self.session.truncate(uri, None, None, None) confirm_empty(self, uri) - self.session.drop(uri, None) + self.dropUntilSuccess(self.session, uri) if self.type == "table:": ComplexDataSet(self, uri, 100).populate() self.session.truncate(uri, None, None, None) confirm_empty(self, uri) - self.session.drop(uri, None) + self.dropUntilSuccess(self.session, uri) # Test truncation of cursors in an illegal order. class test_truncate_cursor_order(wttest.WiredTigerTestCase): @@ -165,7 +165,7 @@ def test_truncate_cursor_order(self): self.session.truncate(None, c1, c2, None) self.assertEqual(c1.close(), 0) self.assertEqual(c2.close(), 0) - self.session.drop(uri) + self.dropUntilSuccess(self.session, uri) if self.type == "table:": ds = ComplexDataSet(self, uri, 100, key_format=self.keyfmt) @@ -177,7 +177,7 @@ def test_truncate_cursor_order(self): self.session.truncate(None, c1, c2, None) self.assertEqual(c1.close(), 0) self.assertEqual(c2.close(), 0) - self.session.drop(uri) + self.dropUntilSuccess(self.session, uri) # Test truncation of empty objects. class test_truncate_empty(wttest.WiredTigerTestCase): @@ -428,7 +428,7 @@ def test_truncate_simple(self): cursor.close() self.truncateRangeAndCheck(ds, uri, begin, end, expected) - self.session.drop(uri, None) + self.dropUntilSuccess(self.session, uri) # Test truncation of complex tables using cursors. We can't do the kind of # layout and detailed testing as we can with files, but this will at least @@ -487,7 +487,7 @@ def test_truncate_complex(self): self.reopen_conn() self.truncateRangeAndCheck(ds, uri, begin, end, expected) - self.session.drop(uri, None) + self.dropUntilSuccess(self.session, uri) if __name__ == '__main__': wttest.run() diff --git a/test/suite/test_upgrade.py b/test/suite/test_upgrade.py index 02691aa0c12..363f305c627 100644 --- a/test/suite/test_upgrade.py +++ b/test/suite/test_upgrade.py @@ -52,8 +52,8 @@ def upgrade(self, dataset, with_cursor): lambda: self.session.drop(uri, None)) cursor.close() - self.session.upgrade(uri, None) - self.session.drop(uri) + self.upgradeUntilSuccess(self.session, uri) + self.dropUntilSuccess(self.session, uri) # Test upgrade of an object. def test_upgrade(self): diff --git a/test/suite/test_util11.py b/test/suite/test_util11.py index 024813e20ec..fd4e359e279 100644 --- a/test/suite/test_util11.py +++ b/test/suite/test_util11.py @@ -95,8 +95,8 @@ def test_list_drop(self): self.session.create('table:' + pfx + '4', params) self.populate(pfx + '2') self.populate(pfx + '3') - self.session.drop('table:' + pfx + '2', None) - self.session.drop('table:' + pfx + '4', None) + self.dropUntilSuccess(self.session, 'table:' + pfx + '2') + self.dropUntilSuccess(self.session, 'table:' + pfx + '4') # Construct what we think we'll find tablelist = ''.join('table:' + pfx + str(i) + '\n' for i in (1, 3, 5)) @@ -119,11 +119,11 @@ def test_list_drop_all(self): self.session.create('table:' + pfx + '4', params) self.populate(pfx + '2') self.populate(pfx + '3') - self.session.drop('table:' + pfx + '5', None) - self.session.drop('table:' + pfx + '4', None) - self.session.drop('table:' + pfx + '3', None) - self.session.drop('table:' + pfx + '2', None) - self.session.drop('table:' + pfx + '1', None) + self.dropUntilSuccess(self.session, 'table:' + pfx + '5') + self.dropUntilSuccess(self.session, 'table:' + pfx + '4') + self.dropUntilSuccess(self.session, 'table:' + pfx + '3') + self.dropUntilSuccess(self.session, 'table:' + pfx + '2') + self.dropUntilSuccess(self.session, 'table:' + pfx + '1') # Construct what we think we'll find filelist = '' diff --git a/test/suite/test_verify.py b/test/suite/test_verify.py index c135362cb58..af0ae6e5f11 100755 --- a/test/suite/test_verify.py +++ b/test/suite/test_verify.py @@ -117,7 +117,7 @@ def test_verify_api(self): params = 'key_format=S,value_format=S' self.session.create('table:' + self.tablename, params) self.populate(self.tablename) - self.session.verify('table:' + self.tablename, None) + self.verifyUntilSuccess(self.session, 'table:' + self.tablename) self.check_populate(self.tablename) def test_verify_api_75pct_null(self): diff --git a/test/suite/wttest.py b/test/suite/wttest.py index 60064c130f3..0eb9dfa5612 100755 --- a/test/suite/wttest.py +++ b/test/suite/wttest.py @@ -662,6 +662,56 @@ def assertTimestampsEqual(self, ts1, ts2): def timestamp_str(self, t): return '%x' % t + def dropUntilSuccess(self, session, uri, config=None): + while True: + try: + session.drop(uri, config) + return + except wiredtiger.WiredTigerError as err: + if str(err) != os.strerror(errno.EBUSY): + raise err + session.checkpoint() + + def verifyUntilSuccess(self, session, uri, config=None): + while True: + try: + session.verify(uri, config) + return + except wiredtiger.WiredTigerError as err: + if str(err) != os.strerror(errno.EBUSY): + raise err + session.checkpoint() + + def renameUntilSuccess(self, session, uri, newUri, config=None): + while True: + try: + session.rename(uri, newUri, config) + return + except wiredtiger.WiredTigerError as err: + if str(err) != os.strerror(errno.EBUSY): + raise err + session.checkpoint() + + def upgradeUntilSuccess(self, session, uri, config=None): + while True: + try: + session.upgrade(uri, config) + return + except wiredtiger.WiredTigerError as err: + if str(err) != os.strerror(errno.EBUSY): + raise err + session.checkpoint() + + def salvageUntilSuccess(self, session, uri, config=None): + while True: + try: + session.salvage(uri, config) + return + except wiredtiger.WiredTigerError as err: + if str(err) != os.strerror(errno.EBUSY): + raise err + session.checkpoint() + def exceptionToStderr(self, expr): """ Used by assertRaisesHavingMessage to convert an expression diff --git a/test/thread/file.c b/test/thread/file.c index 5717e72c63e..104e0cec770 100644 --- a/test/thread/file.c +++ b/test/thread/file.c @@ -105,11 +105,15 @@ load(const char *name) void verify(const char *name) { + WT_DECL_RET; WT_SESSION *session; testutil_check(conn->open_session(conn, NULL, NULL, &session)); - testutil_check(session->verify(session, name, NULL)); + while ((ret = session->verify(session, name, NULL)) == EBUSY) + testutil_check(session->checkpoint(session, NULL)); + + testutil_check(ret); testutil_check(session->close(session, NULL)); } diff --git a/test/utility/test_util.h b/test/utility/test_util.h index 441634cfa90..0f12e5687af 100644 --- a/test/utility/test_util.h +++ b/test/utility/test_util.h @@ -151,6 +151,30 @@ typedef struct { __r, "%s/%d: %s: " fmt, __PRETTY_FUNCTION__, __LINE__, #call, __VA_ARGS__); \ } while (0) +/* + * testutil_drop -- + * Drop a table + */ +#define testutil_drop(session, uri, config) \ + do { \ + int __ret; \ + while ((__ret = session->drop(session, uri, config)) == EBUSY) \ + testutil_check(session->checkpoint(session, NULL)); \ + testutil_check(__ret); \ + } while (0) + +/* + * testutil_verify -- + * Verify a table + */ +#define testutil_verify(session, uri, config) \ + do { \ + int __ret; \ + while ((__ret = session->verify(session, uri, config)) == EBUSY) \ + testutil_check(session->checkpoint(session, NULL)); \ + testutil_check(__ret); \ + } while (0) + /* * error_sys_check -- * Complain and quit if a function call fails. A special name because it appears in the From 2065eff8761708331ec8d0b39c7e1779ac9c1b7a Mon Sep 17 00:00:00 2001 From: "David A. Holland" Date: Thu, 24 Feb 2022 07:35:57 -0500 Subject: [PATCH 17/20] Initial merge with the WT-8170 change was no good; try again. --- src/include/extern.h | 2 ++ src/txn/txn.c | 2 ++ src/txn/txn_timestamp.c | 54 +++++++++++++++++++++++++++++++++++------ 3 files changed, 50 insertions(+), 8 deletions(-) diff --git a/src/include/extern.h b/src/include/extern.h index 3cf657c87f9..53c771ab4a0 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1533,6 +1533,8 @@ extern int __wt_turtle_validate_version(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_activity_drain(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_check_durable_timestamp(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_checkpoint_log(WT_SESSION_IMPL *session, bool full, uint32_t flags, diff --git a/src/txn/txn.c b/src/txn/txn.c index c98ff57f8b5..3863574c739 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -1491,6 +1491,8 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) "durable_timestamp should not be specified for non-prepared transaction"); } + WT_ERR(__wt_txn_check_durable_timestamp(session)); + /* * Release our snapshot in case it is keeping data pinned (this is particularly important for * checkpoints). Before releasing our snapshot, copy values into any positioned cursors so they diff --git a/src/txn/txn_timestamp.c b/src/txn/txn_timestamp.c index 9ea4f24fdb8..a5abefc9ae8 100644 --- a/src/txn/txn_timestamp.c +++ b/src/txn/txn_timestamp.c @@ -519,6 +519,52 @@ __txn_assert_after_reads(WT_SESSION_IMPL *session, const char *op, wt_timestamp_ return (0); } +/* + * __wt_txn_check_durable_timestamp -- + * Check that a transaction's effective durable timestamp is not before the durable timestamp of + * anything it read. That means the durable timestamp for prepared transactions and the commit + * timestamp otherwise. If committing without a timestamp, check that everything read is stable. + * This prevents consistency problems when one transaction reads another's committed values before + * they are stable and then commits and becomes stable first. See WT-8747. + */ +int +__wt_txn_check_durable_timestamp(WT_SESSION_IMPL *session) +{ + WT_TXN *txn; + WT_TXN_GLOBAL *txn_global; + wt_timestamp_t durable_ts; + char ts_string[2][WT_TS_INT_STRING_SIZE]; + + txn = session->txn; + txn_global = &S2C(session)->txn_global; + + /* Transactions that didn't write get a free pass. */ + if (txn->mod_count == 0) + return (0); + + if (F_ISSET(txn, WT_TXN_HAS_TS_DURABLE)) + durable_ts = txn->durable_timestamp; + else if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) + durable_ts = txn->commit_timestamp; + else + durable_ts = WT_TS_NONE; + + if (durable_ts == WT_TS_NONE) { + /* Don't waste cache cycles reading stable unless there's something to check. */ + if (txn->max_durable_timestamp_read != WT_TS_NONE && + txn->max_durable_timestamp_read > txn_global->stable_timestamp) + WT_RET_MSG(session, EINVAL, + "commit without timestamp applies before the durable timestamp %s of a value it read", + __wt_timestamp_to_string(txn->max_durable_timestamp_read, ts_string[1])); + } else if (durable_ts < txn->max_durable_timestamp_read) + WT_RET_MSG(session, EINVAL, + "durable timestamp %s is before the durable timestamp %s of a value it read", + __wt_timestamp_to_string(durable_ts, ts_string[0]), + __wt_timestamp_to_string(txn->max_durable_timestamp_read, ts_string[1])); + + return (0); +} + /* * __wt_txn_set_commit_timestamp -- * Validate the commit timestamp of a transaction. If the commit timestamp is less than the @@ -680,14 +726,6 @@ __wt_txn_set_durable_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t durable_ __wt_timestamp_to_string(durable_ts, ts_string[0]), __wt_timestamp_to_string(txn->commit_timestamp, ts_string[1])); - /* Transactions that have made writes must not commit into the past of things they read. */ - if (txn->mod_count > 0 && durable_ts != WT_TS_NONE && - durable_ts < txn->max_durable_timestamp_read) - WT_RET_MSG(session, EINVAL, - "durable timestamp %s is before the durable timestamp %s of a value it read", - __wt_timestamp_to_string(durable_ts, ts_string[0]), - __wt_timestamp_to_string(txn->max_durable_timestamp_read, ts_string[1])); - txn->durable_timestamp = durable_ts; F_SET(txn, WT_TXN_HAS_TS_DURABLE); From 64d6d3c12cc46122eb83ba2c3eb19e329916f54e Mon Sep 17 00:00:00 2001 From: "David A. Holland" Date: Thu, 24 Feb 2022 07:46:51 -0500 Subject: [PATCH 18/20] Rewrite the new durable_ts04 for this approach. --- test/suite/test_durable_ts04.py | 259 ++++++++++++++++++++------------ 1 file changed, 164 insertions(+), 95 deletions(-) diff --git a/test/suite/test_durable_ts04.py b/test/suite/test_durable_ts04.py index 66149b53848..666dfa90f13 100644 --- a/test/suite/test_durable_ts04.py +++ b/test/suite/test_durable_ts04.py @@ -29,7 +29,7 @@ from helper import simulate_crash_restart import wiredtiger, wttest from wiredtiger import WT_NOTFOUND -from wtscenario import make_scenarios +from wtscenario import filter_scenarios, make_scenarios # test_durable_ts04.py # @@ -39,20 +39,52 @@ # before it, which in turn leads to inconsistency because a crash will preserve the # second transaction but not the first. # -# To avoid this problem, we prohibit reads between the commit time and the durable -# time, until the global timestamp reaches the durable time. +# To avoid this problem, we track the durable timestamp of everything a transaction +# reads, and reject attempts to commit with durable timestamp (or commit timestamp +# if not prepared) less than that. # -# This test makes sure that mechanism works as intended. -# 1. Reading nondurable data produces WT_PREPARE_CONFLICT. -# 2. Advancing stable to the durable timestamp eliminates this behavior. -# 3. The behavior applies to both updates and removes. -# 4. The behavior applies to pages that have been evicted as well as to in-memory updates. -# 5. Rollback-to-stable can successfully roll back the transaction before it becomes stable. -# 6. ignore_prepare=true disables the check. +# This test makes sure that mechanism works as intended, by setting up a transaction +# with nondurable data and then handling it in a second transaction. # -# Additionally there are two ways to set up the scenario; one involves committing -# before stable. (This is explicitly permitted if stable has moved up since prepare, as -# long as the prepare happend after stable.) +# In the first transaction, +# - The operation may be update or remove; durable timestamps on tombstones also matter. +# - There are two ways to set up the scenario; one involves committing before stable by +# preparing and moving stable up, which is explicitly allowed. These scenarios are not +# really different, but both should get checked. +# - After commiting we can evict and/or checkpoint; on-disk durable timestamps also matter, +# including durable stop times for deleted values. +# - It isn't clear that both evict and checkpoint is useful; might make sense to remove +# that combination if looking to prune the number of scenarios. +# +# In the second transaction: +# - Reading nondurable data is always permitted. +# - The operation can be read, remove, update, insert, or truncate; the write operations +# can also read (which should trigger the restriction) or not ("blind write"), which +# should not. +# - The read can have a timestamp that is in the nondurable range, or no timestamp, which +# also produces a nondurable read because stable hasn't been moved up. +# - The commit can be either plain (no timestamp), timestamped, or prepared. +# - We can try to hit the restriction (expect failure) or not: +# - a plain commit will fail if the durable timestamp of something it read is beyond +# stable as of when it commits, and won't if everything it read is stable. +# - a timestamped commit will fail if its durable timestamp is before the durable +# timestamp of something it read, and won't if its durable timestamp is the same +# or later. +# +# Note that we filter out the case where both the first transaction and the second +# transaction do removes (including truncates) because this doesn't work -- as of this +# writing the second remove is ignored, but this is expected to change so it fails, and +# either way nothing interesting happens. +# +# We also filter out the expected failure scenario for preparing the second transaction +# because that causes a panic. Fortunately the protection scheme can be tested with an +# ordinary timestamped commit. (The prepared scenario does work correctly as of when it +# was written, but because it can't be enabled by default it doesn't help much in +# production.) It would be nice to have a debug switch to turn that panic into a Python +# exception. +# +# We do not for the moment try to create multiple unstable values in history at once. +# That should probably be tested sometime. class test_durable_ts04(wttest.WiredTigerTestCase): conn_config = 'cache_size=10MB' @@ -62,13 +94,14 @@ class test_durable_ts04(wttest.WiredTigerTestCase): ('column', dict(key_format='r', value_format='S')), ('column-fix', dict(key_format='r', value_format='8t')), ] - commit_values = [ - ('commit_before_stable', dict(commit_before_stable=True)), - ('commit_after_stable', dict(commit_before_stable=False)), + # Options for the setup. + first_remove_values = [ + ('update', dict(first_remove=False)), + ('remove', dict(first_remove=True)), ] - remove_values = [ - ('update', dict(do_remove=False)), - ('remove', dict(do_remove=True)), + first_commit_values = [ + ('commit_before_stable', dict(first_commit_before_stable=True)), + ('commit_after_stable', dict(first_commit_before_stable=False)), ] evict_values = [ ('noevict', dict(do_evict=False)), @@ -78,44 +111,54 @@ class test_durable_ts04(wttest.WiredTigerTestCase): ('nocheckpoint', dict(do_checkpoint=False)), ('checkpoint', dict(do_checkpoint=True)), ] - op_values = [ - ('crash', dict(op='crash')), - ('rollback', dict(op='rollback')), - ('stabilize', dict(op='stabilize')), + # Options for the subsequent transaction. + second_readts_values = [ + ('readts', dict(second_read_ts=True)), + ('noreadts', dict(second_read_ts=False)), ] - ignoreprepare_values = [ - ('no_ignore_prepare', dict(ignore_prepare=False)), - ('ignore_prepare', dict(ignore_prepare=True)), + second_op_values = [ + ('read', dict(second_op='nop', second_read=True, op_xfail=False)), + ('remove', dict(second_op='remove', second_read=False, op_xfail=False)), + ('read_remove', dict(second_op='remove', second_read=True, op_xfail=True)), + ('update', dict(second_op='update', second_read=False, op_xfail=False)), + ('read_update', dict(second_op='update', second_read=True, op_xfail=True)), + ('insert', dict(second_op='insert', second_read=False, op_xfail=False)), + ('read_insert', dict(second_op='insert', second_read=True, op_xfail=True)), + # For the moment at least, truncate is a read-modify-write and will fail here even + # without an explicit read. + ('truncate', dict(second_op='truncate', second_read=False, op_xfail=True)), + ('read_truncate', dict(second_op='truncate', second_read=True, op_xfail=True)), + ] + second_commit_values = [ + ('commit-plain', dict(second_commit='plain')), + ('commit-ts', dict(second_commit='ts')), + ('commit-prepare', dict(second_commit='prepare')), + ] + failure_values = [ + ('bad', dict(commit_xfail=True)), + ('ok', dict(commit_xfail=False)), ] - scenarios = make_scenarios(format_values, - commit_values, remove_values, evict_values, checkpoint_values, op_values, - ignoreprepare_values) - - Deleted = 1234567 # Choose this to be different from any legal value. - - def check_range(self, uri, lo, hi, value, read_ts): - cursor = self.session.open_cursor(uri) - if value is None: - for i in range(lo, hi): - self.session.begin_transaction('read_timestamp=' + self.timestamp_str(read_ts)) - self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: cursor[i], '/committed but non-durable value/') - self.session.rollback_transaction() - else: - ign = ',ignore_prepare=true' if self.ignore_prepare else '' - self.session.begin_transaction('read_timestamp=' + self.timestamp_str(read_ts) + ign) - for i in range(lo, hi): - if value == self.Deleted: - cursor.set_key(i) - self.assertEqual(cursor.search(), WT_NOTFOUND) - else: - self.assertEqual(cursor[i], value) - self.session.rollback_transaction() - cursor.close() - def check(self, uri, nrows, low_value, high_value, read_ts): - self.check_range(uri, 1, nrows // 2, low_value, read_ts) - self.check_range(uri, nrows // 2, nrows + 1, high_value, read_ts) + def keep_scenario(name, vals): + # Doing remove in the first operation and remove (or truncate) in the second does not + # do anything interesting. + first_remove = vals['first_remove'] + second_op = vals['second_op'] + if first_remove and (second_op == 'remove' or 'second_op' == 'truncate'): + return False + # Cannot test the prepare.bad case because it panics. + # FUTURE: we should have a debug flag to turn the prepared failure panic into a + # Python exception for testing. + second_commit = vals['second_commit'] + commit_xfail = vals['commit_xfail'] + if second_commit == 'prepare' and commit_xfail: + return False + return True + scenarios = filter_scenarios( + make_scenarios(format_values, + first_commit_values, first_remove_values, evict_values, checkpoint_values, + second_readts_values, second_op_values, second_commit_values, failure_values), + keep_scenario) def evict(self, uri, lo, hi, value, read_ts): evict_cursor = self.session.open_cursor(uri, None, "debug=(release_evict)") @@ -134,10 +177,12 @@ def test_durable_ts04(self): uri, 'key_format={},value_format={}'.format(self.key_format, self.value_format)) if self.value_format == '8t': value_a = 97 - value_b = 0 if self.do_remove else 98 + value_b = 0 if self.first_remove else 98 + value_c = 99 else: value_a = "aaaaa" * 100 - value_b = self.Deleted if self.do_remove else "bbbbb" * 100 + value_b = None if self.first_remove else "bbbbb" * 100 + value_c = "ccccc" * 100 cursor = self.session.open_cursor(uri) @@ -156,67 +201,91 @@ def test_durable_ts04(self): self.session.begin_transaction() for k in range(1, nrows // 2): cursor.set_key(k) - if self.do_remove: + if self.first_remove: self.assertEqual(cursor.remove(), 0) else: cursor.set_value(value_b) self.assertEqual(cursor.update(), 0) self.session.prepare_transaction('prepare_timestamp=' + self.timestamp_str(10)) - if self.commit_before_stable: + if self.first_commit_before_stable: self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(30)) self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(20) + ',durable_timestamp=' + self.timestamp_str(50)) - if not self.commit_before_stable: + if not self.first_commit_before_stable: self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(30)) - cursor.close() + cursor.reset() # Optionally evict. Have the eviction cursor read at 10 to avoid touching the # nondurable data. if self.do_evict: self.evict(uri, 1, nrows, value_a, 10) - # Now try reading at 25 and 40. This should fail in all variants. - # (25 is after commit, before stable, and before durable; 40 is after commit - # and stable, and before durable.) - # If ignore_prepare is set, we should see the transaction anyway and get value_b. - self.check(uri, nrows, value_b if self.ignore_prepare else None, value_a, 25) - self.check(uri, nrows, value_b if self.ignore_prepare else None, value_a, 40) - - # We should be able to read at 50. - self.check(uri, nrows, value_b, value_a, 50) - # Optionally checkpoint. if self.do_checkpoint: self.session.checkpoint() - # Now either crash, roll back explicitly, or move stable forward. - if self.op == 'crash' or self.op == 'rollback': - if self.op == 'crash': - simulate_crash_restart(self, ".", "RESTART") + # Now do another transaction on top. Read at 40, in the nondurable range + # (that is, after stable, before the transaction's durable timestamp), or + # with no timestamp at all (which will read the latest data). We should be + # able to read the nondurable data freely, but doing so should constrain + # when we can commit. + + if self.second_read_ts: + self.session.begin_transaction('read_timestamp=' + self.timestamp_str(40)) + else: + self.session.begin_transaction() + + if self.second_read: + for k in range(1, nrows // 2): + if self.first_remove and self.value_format != '8t': + cursor.set_key(k) + self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND) + else: + self.assertEqual(cursor[k], value_b) + + if self.second_op == 'truncate': + lo = self.session.open_cursor(uri) + hi = self.session.open_cursor(uri) + lo.set_key(1) + hi.set_key(nrows // 2) + self.assertEqual(self.session.truncate(None, lo, hi, None), 0) + lo.close() + hi.close() + else: + for k in range(1, nrows // 2): + cursor.set_key(k) + if self.second_op == 'remove': + self.assertEqual(cursor.remove(), 0) + else: + cursor.set_value(value_c) + if self.second_op == 'update': + self.assertEqual(cursor.update(), 0) + elif self.second_op == 'insert': + self.assertEqual(cursor.insert(), 0) + + # Set the commit properties. + if self.second_commit == 'plain': + if not self.commit_xfail: + self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(60)) + else: + # Commit at either 45 or 60. 45 is after stable but before the durable timestamp we + # read and should fail. 60 is after and should succeed. Note that we could here also + # prepare before stable, move stable up, and commit before stable, but that doesn't + # seem useful as it's the handling of durable timestamps we're checking up on. + tsstr = self.timestamp_str(45 if self.commit_xfail else 60) + if self.second_commit == 'prepare': + self.session.prepare_transaction('prepare_timestamp=' + tsstr) + self.session.timestamp_transaction('commit_timestamp=' + tsstr) + self.session.timestamp_transaction('durable_timestamp=' + tsstr) else: - self.conn.rollback_to_stable() - - # The transaction should disappear, because it isn't stable yet, and it - # should do so without any noise or failures caused by the nondurable checks. - self.check(uri, nrows, value_a, value_a, 15) - self.check(uri, nrows, value_a, value_a, 25) - self.check(uri, nrows, value_a, value_a, 40) - self.check(uri, nrows, value_a, value_a, 50) + self.session.timestamp_transaction('commit_timestamp=' + tsstr) + + if self.op_xfail and self.commit_xfail: + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.commit_transaction(), '/before the durable timestamp/') else: - # First, move stable to 49 and check we still can't read the nondurable values. - self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(49)) - self.check(uri, nrows, value_b if self.ignore_prepare else None, value_a, 25) - self.check(uri, nrows, value_b if self.ignore_prepare else None, value_a, 40) - self.check(uri, nrows, value_b, value_a, 50) - - # Now, set it to 50 and we should be able to read the previously nondurable values. - self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(50)) - self.check(uri, nrows, value_a, value_a, 15) - self.check(uri, nrows, value_b, value_a, 25) - self.check(uri, nrows, value_b, value_a, 30) - self.check(uri, nrows, value_b, value_a, 40) - self.check(uri, nrows, value_b, value_a, 50) + self.session.commit_transaction() if __name__ == '__main__': wttest.run() From c4c3e7509593e46c3e842d10ca2427fe12e16c12 Mon Sep 17 00:00:00 2001 From: "David A. Holland" Date: Thu, 24 Feb 2022 19:03:41 -0500 Subject: [PATCH 19/20] Adjust durable_ts04 to avoid tripping on an unrelated issue. --- test/suite/test_durable_ts04.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/test/suite/test_durable_ts04.py b/test/suite/test_durable_ts04.py index 666dfa90f13..b13efc47843 100644 --- a/test/suite/test_durable_ts04.py +++ b/test/suite/test_durable_ts04.py @@ -70,6 +70,7 @@ # - a timestamped commit will fail if its durable timestamp is before the durable # timestamp of something it read, and won't if its durable timestamp is the same # or later. +# - note however that this only actually fails if the operation triggers the restriction. # # Note that we filter out the case where both the first transaction and the second # transaction do removes (including truncates) because this doesn't work -- as of this @@ -124,9 +125,9 @@ class test_durable_ts04(wttest.WiredTigerTestCase): ('read_update', dict(second_op='update', second_read=True, op_xfail=True)), ('insert', dict(second_op='insert', second_read=False, op_xfail=False)), ('read_insert', dict(second_op='insert', second_read=True, op_xfail=True)), - # For the moment at least, truncate is a read-modify-write and will fail here even - # without an explicit read. - ('truncate', dict(second_op='truncate', second_read=False, op_xfail=True)), + # While truncate is a read-modify-write, it does not fail here without a + # read because we operate on disjoint chunks of the database. + ('truncate', dict(second_op='truncate', second_read=False, op_xfail=False)), ('read_truncate', dict(second_op='truncate', second_read=True, op_xfail=True)), ] second_commit_values = [ @@ -247,13 +248,13 @@ def test_durable_ts04(self): if self.second_op == 'truncate': lo = self.session.open_cursor(uri) hi = self.session.open_cursor(uri) - lo.set_key(1) - hi.set_key(nrows // 2) + lo.set_key(nrows // 2 + 1) + hi.set_key(nrows) self.assertEqual(self.session.truncate(None, lo, hi, None), 0) lo.close() hi.close() else: - for k in range(1, nrows // 2): + for k in range(nrows // 2 + 1, nrows): cursor.set_key(k) if self.second_op == 'remove': self.assertEqual(cursor.remove(), 0) From 55d2ff08b4b659a1f0bed2a38d03a6cbdda3c189 Mon Sep 17 00:00:00 2001 From: "David A. Holland" Date: Fri, 25 Feb 2022 19:34:46 -0500 Subject: [PATCH 20/20] Fix up merge botches. --- src/docs/timestamp-prepare.dox | 30 +++++++++++++----------------- test/suite/test_durable_ts03.py | 1 - 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/src/docs/timestamp-prepare.dox b/src/docs/timestamp-prepare.dox index fc836147584..0e2649720b6 100644 --- a/src/docs/timestamp-prepare.dox +++ b/src/docs/timestamp-prepare.dox @@ -48,23 +48,19 @@ consensus on durability. Applications without similar requirements for prepared transactions should set the durable and commit timestamps to the same time. When a transaction has a durable timestamp later than its commit timestamp, -reading its writes in a second transaction and then committing such that the -second transaction becomes durable before the first can produce data -inconsistency. Such commits are prohibited. -Applications that create gaps between their commit timestamps and -durable timestamps are responsible for either not reading in those -gaps, or coordinating the durable timestamps of their commits to -prevent this scenario. - -\warning It is also possible to create data inconsistency by reading such a -transaction's writes after its durable timestamp (including by -reading with no read timestamp) and committing without a -timestamp, if the global stable timestamp has not yet reached the durable -timestamp. (This can involve either reading with a read timestamp after the -durable timestamp, or reading with no read timestamp, as the latter reads the -latest committed values.) Applications using the delayed durable timestamp -feature should be cautious about committing with no write timestamp to ensure -that conflicts of this type cannot arise. +reading its writes in a second transaction and then committing other writes such +that the second transaction becomes durable before the first can produce data +inconsistency. +In this scenario the second transaction depends on the first; thus it must be +rolled back if the first transaction is rolled back; thus it must not become +durable before the first transaction. +Applications that create gaps between their commit timestamps and durable +timestamps are responsible for either not reading in those gaps, or establishing +an ordering for the durable timestamps of their commits to make sure that +this scenario cannot occur. +(Note that for the purposes of this issue the commit timestamp of a non-prepared +transaction is also its durable timestamp, and committing with no timestamp is +roughly comparable to committing at the current stable timestamp.) Transactions that violate this restriction will fail at commit time. diff --git a/test/suite/test_durable_ts03.py b/test/suite/test_durable_ts03.py index 6fea0ac2e73..f78f85a187e 100755 --- a/test/suite/test_durable_ts03.py +++ b/test/suite/test_durable_ts03.py @@ -27,7 +27,6 @@ # OTHER DEALINGS IN THE SOFTWARE. import wttest -import wiredtiger from wtscenario import make_scenarios # test_durable_ts03.py