diff --git a/dist/api_data.py b/dist/api_data.py index 51f4d159446..88888703a33 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -834,8 +834,8 @@ def __ge__(self, other): 'failpoint_eviction_fail_after_reconciliation', 'failpoint_history_store_delete_key_from_ts', 'history_store_checkpoint_delay', 'history_store_search', 'history_store_sweep_race', 'prepare_checkpoint_delay', - 'sleep_before_read_overflow_onpage', 'split_1', 'split_2', 'split_3', 'split_4', 'split_5', - 'split_6', 'split_7', 'split_8', 'tiered_flush_finish']), + 'prepare_resolution','sleep_before_read_overflow_onpage', 'split_1', 'split_2', 'split_3', + 'split_4', 'split_5','split_6', 'split_7', 'split_8', 'tiered_flush_finish']), Config('verbose', '[]', r''' enable messages for various subsystems and operations. Options are given as a list, where each message type can optionally define an associated verbosity level, such as diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 166db18a70f..c38c8733a69 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -628,33 +628,41 @@ __wt_btcur_search_prepared(WT_CURSOR *cursor, WT_UPDATE **updp) { WT_BTREE *btree; WT_CURSOR_BTREE *cbt; + WT_DECL_RET; WT_UPDATE *upd; - *updp = NULL; - + *updp = upd = NULL; /* -Wuninitialized */ cbt = (WT_CURSOR_BTREE *)cursor; btree = CUR2BT(cbt); - upd = NULL; /* -Wuninitialized */ /* - * Not calling the cursor initialization functions, we don't want to be tapped for eviction nor - * do we want other standard cursor semantics like snapshots, just discard the hazard pointer - * from the last operation. This also depends on the fact we're not setting the cursor's active - * flag, this is really a special chunk of code and not to be modified without careful thought. + * Set the key only flag to indicate to the search that we don't want to check visibility we + * just want to position on a key. This short circuits validity checking. */ - WT_RET(__cursor_reset(cbt)); - - WT_RET(btree->type == BTREE_ROW ? __cursor_row_search(cbt, false, NULL, NULL) : - __cursor_col_search(cbt, NULL, NULL)); - + F_SET(&cbt->iface, WT_CURSTD_KEY_ONLY); /* - * Ideally an exact match will be found, as this transaction is searching for updates done by - * itself. But, we cannot be sure of finding one, as pre-processing of the updates could have - * happened as part of resolving earlier transaction operations. + * The search logic searches the pinned page first, which would be the previously resolved + * update chain's page. If that doesn't find the key we want it searches from the root. */ - if (cbt->compare != 0) - return (0); - + ret = __wt_btcur_search(cbt); + F_CLR(&cbt->iface, WT_CURSTD_KEY_ONLY); + /* + * The following assertion relies on the fact that for every prepared update there must be an + * associated key. However this is only true if we pin the page to prevent eviction. By calling + * into the standard search function we avoid releasing our hazard pointer between update chain + * resolutions. It also depends on sorting the transaction modifications by key, if we didn't do + * that we would unpin the page between searches and later come back to the same key. We rely on + * resolving all updates for a single key in sequence. + * + * This is a complex scenario, suppose we have two updates to the same key by our transaction, + * and are resolving the prepared updates. The first pass resolves the update chain, now if we + * let eviction run it could evict the page and it will treat the update chain as a regular non + * prepared update chain. If we were rolling back the transaction the key may not exist after + * eviction, similarly if we wrote a globally visible tombstone. Thus our second attempt at + * resolution would fail as it wouldn't find a key. + */ + WT_ASSERT_ALWAYS( + CUR2S(cursor), ret == 0, "A valid key must exist when resolving prepared updates."); /* Get any uncommitted update from the in-memory page. */ switch (btree->type) { case BTREE_ROW: diff --git a/src/config/config_def.c b/src/config/config_def.c index f4c0dbcc9d2..a2983235cbe 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -179,9 +179,9 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { "\"failpoint_history_store_delete_key_from_ts\"," "\"history_store_checkpoint_delay\",\"history_store_search\"," "\"history_store_sweep_race\",\"prepare_checkpoint_delay\"," - "\"sleep_before_read_overflow_onpage\",\"split_1\",\"split_2\"," - "\"split_3\",\"split_4\",\"split_5\",\"split_6\",\"split_7\"," - "\"split_8\",\"tiered_flush_finish\"]", + "\"prepare_resolution\",\"sleep_before_read_overflow_onpage\"," + "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\"," + "\"split_6\",\"split_7\",\"split_8\",\"tiered_flush_finish\"]", NULL, 0}, {"verbose", "list", NULL, "choices=[\"api\",\"backup\",\"block\",\"block_cache\"," @@ -944,9 +944,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { "\"failpoint_history_store_delete_key_from_ts\"," "\"history_store_checkpoint_delay\",\"history_store_search\"," "\"history_store_sweep_race\",\"prepare_checkpoint_delay\"," - "\"sleep_before_read_overflow_onpage\",\"split_1\",\"split_2\"," - "\"split_3\",\"split_4\",\"split_5\",\"split_6\",\"split_7\"," - "\"split_8\",\"tiered_flush_finish\"]", + "\"prepare_resolution\",\"sleep_before_read_overflow_onpage\"," + "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\"," + "\"split_6\",\"split_7\",\"split_8\",\"tiered_flush_finish\"]", NULL, 0}, {"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs, 2}, @@ -1038,9 +1038,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { "\"failpoint_history_store_delete_key_from_ts\"," "\"history_store_checkpoint_delay\",\"history_store_search\"," "\"history_store_sweep_race\",\"prepare_checkpoint_delay\"," - "\"sleep_before_read_overflow_onpage\",\"split_1\",\"split_2\"," - "\"split_3\",\"split_4\",\"split_5\",\"split_6\",\"split_7\"," - "\"split_8\",\"tiered_flush_finish\"]", + "\"prepare_resolution\",\"sleep_before_read_overflow_onpage\"," + "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\"," + "\"split_6\",\"split_7\",\"split_8\",\"tiered_flush_finish\"]", NULL, 0}, {"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs, 2}, @@ -1130,9 +1130,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { "\"failpoint_history_store_delete_key_from_ts\"," "\"history_store_checkpoint_delay\",\"history_store_search\"," "\"history_store_sweep_race\",\"prepare_checkpoint_delay\"," - "\"sleep_before_read_overflow_onpage\",\"split_1\",\"split_2\"," - "\"split_3\",\"split_4\",\"split_5\",\"split_6\",\"split_7\"," - "\"split_8\",\"tiered_flush_finish\"]", + "\"prepare_resolution\",\"sleep_before_read_overflow_onpage\"," + "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\"," + "\"split_6\",\"split_7\",\"split_8\",\"tiered_flush_finish\"]", NULL, 0}, {"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs, 2}, @@ -1220,9 +1220,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { "\"failpoint_history_store_delete_key_from_ts\"," "\"history_store_checkpoint_delay\",\"history_store_search\"," "\"history_store_sweep_race\",\"prepare_checkpoint_delay\"," - "\"sleep_before_read_overflow_onpage\",\"split_1\",\"split_2\"," - "\"split_3\",\"split_4\",\"split_5\",\"split_6\",\"split_7\"," - "\"split_8\",\"tiered_flush_finish\"]", + "\"prepare_resolution\",\"sleep_before_read_overflow_onpage\"," + "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\"," + "\"split_6\",\"split_7\",\"split_8\",\"tiered_flush_finish\"]", NULL, 0}, {"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs, 2}, diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index edfddfd6a8d..b02a75a3d11 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -2360,6 +2360,7 @@ __wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[]) {"history_store_search", WT_TIMING_STRESS_HS_SEARCH}, {"history_store_sweep_race", WT_TIMING_STRESS_HS_SWEEP}, {"prepare_checkpoint_delay", WT_TIMING_STRESS_PREPARE_CHECKPOINT_DELAY}, + {"prepare_resolution", WT_TIMING_STRESS_PREPARE_RESOLUTION}, {"sleep_before_read_overflow_onpage", WT_TIMING_STRESS_SLEEP_BEFORE_READ_OVERFLOW_ONPAGE}, {"split_1", WT_TIMING_STRESS_SPLIT_1}, {"split_2", WT_TIMING_STRESS_SPLIT_2}, {"split_3", WT_TIMING_STRESS_SPLIT_3}, {"split_4", WT_TIMING_STRESS_SPLIT_4}, diff --git a/src/include/connection.h b/src/include/connection.h index d6ce94540b3..9a0351c4295 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -619,30 +619,31 @@ struct __wt_connection_impl { * Variable with flags for which subsystems the diagnostic stress timing delays have been requested. */ /* AUTOMATIC FLAG VALUE GENERATION START 0 */ -#define WT_TIMING_STRESS_AGGRESSIVE_SWEEP 0x000001u -#define WT_TIMING_STRESS_BACKUP_RENAME 0x000002u -#define WT_TIMING_STRESS_CHECKPOINT_EVICT_PAGE 0x000004u -#define WT_TIMING_STRESS_CHECKPOINT_HANDLE 0x000008u -#define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x000010u -#define WT_TIMING_STRESS_CHECKPOINT_STOP 0x000020u -#define WT_TIMING_STRESS_COMPACT_SLOW 0x000040u -#define WT_TIMING_STRESS_EVICT_REPOSITION 0x000080u -#define WT_TIMING_STRESS_FAILPOINT_EVICTION_FAIL_AFTER_RECONCILIATION 0x000100u -#define WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_DELETE_KEY_FROM_TS 0x000200u -#define WT_TIMING_STRESS_HS_CHECKPOINT_DELAY 0x000400u -#define WT_TIMING_STRESS_HS_SEARCH 0x000800u -#define WT_TIMING_STRESS_HS_SWEEP 0x001000u -#define WT_TIMING_STRESS_PREPARE_CHECKPOINT_DELAY 0x002000u -#define WT_TIMING_STRESS_SLEEP_BEFORE_READ_OVERFLOW_ONPAGE 0x004000u -#define WT_TIMING_STRESS_SPLIT_1 0x008000u -#define WT_TIMING_STRESS_SPLIT_2 0x010000u -#define WT_TIMING_STRESS_SPLIT_3 0x020000u -#define WT_TIMING_STRESS_SPLIT_4 0x040000u -#define WT_TIMING_STRESS_SPLIT_5 0x080000u -#define WT_TIMING_STRESS_SPLIT_6 0x100000u -#define WT_TIMING_STRESS_SPLIT_7 0x200000u -#define WT_TIMING_STRESS_SPLIT_8 0x400000u -#define WT_TIMING_STRESS_TIERED_FLUSH_FINISH 0x800000u +#define WT_TIMING_STRESS_AGGRESSIVE_SWEEP 0x0000001u +#define WT_TIMING_STRESS_BACKUP_RENAME 0x0000002u +#define WT_TIMING_STRESS_CHECKPOINT_EVICT_PAGE 0x0000004u +#define WT_TIMING_STRESS_CHECKPOINT_HANDLE 0x0000008u +#define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x0000010u +#define WT_TIMING_STRESS_CHECKPOINT_STOP 0x0000020u +#define WT_TIMING_STRESS_COMPACT_SLOW 0x0000040u +#define WT_TIMING_STRESS_EVICT_REPOSITION 0x0000080u +#define WT_TIMING_STRESS_FAILPOINT_EVICTION_FAIL_AFTER_RECONCILIATION 0x0000100u +#define WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_DELETE_KEY_FROM_TS 0x0000200u +#define WT_TIMING_STRESS_HS_CHECKPOINT_DELAY 0x0000400u +#define WT_TIMING_STRESS_HS_SEARCH 0x0000800u +#define WT_TIMING_STRESS_HS_SWEEP 0x0001000u +#define WT_TIMING_STRESS_PREPARE_CHECKPOINT_DELAY 0x0002000u +#define WT_TIMING_STRESS_PREPARE_RESOLUTION 0x0004000u +#define WT_TIMING_STRESS_SLEEP_BEFORE_READ_OVERFLOW_ONPAGE 0x0008000u +#define WT_TIMING_STRESS_SPLIT_1 0x0010000u +#define WT_TIMING_STRESS_SPLIT_2 0x0020000u +#define WT_TIMING_STRESS_SPLIT_3 0x0040000u +#define WT_TIMING_STRESS_SPLIT_4 0x0080000u +#define WT_TIMING_STRESS_SPLIT_5 0x0100000u +#define WT_TIMING_STRESS_SPLIT_6 0x0200000u +#define WT_TIMING_STRESS_SPLIT_7 0x0400000u +#define WT_TIMING_STRESS_SPLIT_8 0x0800000u +#define WT_TIMING_STRESS_TIERED_FLUSH_FINISH 0x1000000u /* AUTOMATIC FLAG VALUE GENERATION STOP 32 */ uint32_t timing_stress_flags; diff --git a/src/txn/txn.c b/src/txn/txn.c index 845895ccd06..f28c9b8360c 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -1663,6 +1663,9 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) */ if (!F_ISSET(op, WT_TXN_OP_KEY_REPEATED)) WT_ERR(__txn_resolve_prepared_op(session, op, true, &cursor)); + + /* Sleep between resolving prepared operations when configured. */ + __wt_timing_stress(session, WT_TIMING_STRESS_PREPARE_RESOLUTION, NULL); #ifdef HAVE_DIAGNOSTIC ++prepare_count; #endif diff --git a/test/format/config.h b/test/format/config.h index 57d6099037e..3e5a2cc2b1a 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -142,21 +142,22 @@ typedef struct { #define V_GLOBAL_STRESS_HS_CHECKPOINT_DELAY 110 #define V_GLOBAL_STRESS_HS_SEARCH 111 #define V_GLOBAL_STRESS_HS_SWEEP 112 -#define V_GLOBAL_STRESS_SLEEP_BEFORE_READ_OVERFLOW_ONPAGE 113 -#define V_GLOBAL_STRESS_SPLIT_1 114 -#define V_GLOBAL_STRESS_SPLIT_2 115 -#define V_GLOBAL_STRESS_SPLIT_3 116 -#define V_GLOBAL_STRESS_SPLIT_4 117 -#define V_GLOBAL_STRESS_SPLIT_5 118 -#define V_GLOBAL_STRESS_SPLIT_6 119 -#define V_GLOBAL_STRESS_SPLIT_7 120 -#define V_GLOBAL_STRESS_SPLIT_8 121 -#define V_GLOBAL_TIERED_STORAGE_FLUSH_FREQUENCY 122 -#define V_GLOBAL_TIERED_STORAGE_STORAGE_SOURCE 123 -#define V_GLOBAL_TRANSACTION_IMPLICIT 124 -#define V_GLOBAL_TRANSACTION_TIMESTAMPS 125 -#define V_GLOBAL_WIREDTIGER_CONFIG 126 -#define V_GLOBAL_WIREDTIGER_RWLOCK 127 -#define V_GLOBAL_WIREDTIGER_LEAK_MEMORY 128 +#define V_GLOBAL_STRESS_PREPARE_RESOLUTION 113 +#define V_GLOBAL_STRESS_SLEEP_BEFORE_READ_OVERFLOW_ONPAGE 114 +#define V_GLOBAL_STRESS_SPLIT_1 115 +#define V_GLOBAL_STRESS_SPLIT_2 116 +#define V_GLOBAL_STRESS_SPLIT_3 117 +#define V_GLOBAL_STRESS_SPLIT_4 118 +#define V_GLOBAL_STRESS_SPLIT_5 119 +#define V_GLOBAL_STRESS_SPLIT_6 120 +#define V_GLOBAL_STRESS_SPLIT_7 121 +#define V_GLOBAL_STRESS_SPLIT_8 122 +#define V_GLOBAL_TIERED_STORAGE_FLUSH_FREQUENCY 123 +#define V_GLOBAL_TIERED_STORAGE_STORAGE_SOURCE 124 +#define V_GLOBAL_TRANSACTION_IMPLICIT 125 +#define V_GLOBAL_TRANSACTION_TIMESTAMPS 126 +#define V_GLOBAL_WIREDTIGER_CONFIG 127 +#define V_GLOBAL_WIREDTIGER_RWLOCK 128 +#define V_GLOBAL_WIREDTIGER_LEAK_MEMORY 129 -#define V_ELEMENT_COUNT 129 +#define V_ELEMENT_COUNT 130 diff --git a/test/format/config.sh b/test/format/config.sh index 11d5587bdcf..5694929cec7 100755 --- a/test/format/config.sh +++ b/test/format/config.sh @@ -303,6 +303,8 @@ CONFIG configuration_list[] = { {"stress.hs_sweep", "stress history store sweep", C_BOOL, 2, 0, 0} +{"stress.prepare_resolution", "stress prepare resolution", C_BOOL, 2, 0, 0} + {"stress.sleep_before_read_overflow_onpage", "stress onpage overflow read race with checkpoint", C_BOOL, 2, 0, 0} {"stress.split_1", "stress splits (#1)", C_BOOL, 2, 0, 0} diff --git a/test/format/format_config_def.c b/test/format/format_config_def.c index e57afdee457..9d53540c2c5 100644 --- a/test/format/format_config_def.c +++ b/test/format/format_config_def.c @@ -328,6 +328,9 @@ CONFIG configuration_list[] = {{"assert.read_timestamp", "assert read_timestamp" {"stress.hs_sweep", "stress history store sweep", C_BOOL, 2, 0, 0, V_GLOBAL_STRESS_HS_SWEEP}, + {"stress.prepare_resolution", "stress prepare resolution", C_BOOL, 2, 0, 0, + V_GLOBAL_STRESS_PREPARE_RESOLUTION}, + {"stress.sleep_before_read_overflow_onpage", "stress onpage overflow read race with checkpoint", C_BOOL, 2, 0, 0, V_GLOBAL_STRESS_SLEEP_BEFORE_READ_OVERFLOW_ONPAGE}, diff --git a/test/format/wts.c b/test/format/wts.c index 038b4d669d7..8121e890a5a 100644 --- a/test/format/wts.c +++ b/test/format/wts.c @@ -177,6 +177,8 @@ configure_timing_stress(char **p, size_t max) CONFIG_APPEND(*p, ",history_store_search"); if (GV(STRESS_HS_SWEEP)) CONFIG_APPEND(*p, ",history_store_sweep_race"); + if (GV(STRESS_PREPARE_RESOLUTION)) + CONFIG_APPEND(*p, ",prepare_resolution"); if (GV(STRESS_SLEEP_BEFORE_READ_OVERFLOW_ONPAGE)) CONFIG_APPEND(*p, ",sleep_before_read_overflow_onpage"); if (GV(STRESS_SPLIT_1))