Skip to content

Commit

Permalink
WT-10873 Pin the page we found during prepared update resolution. (#9114
Browse files Browse the repository at this point in the history
)
  • Loading branch information
luke-pearson committed May 2, 2023
1 parent e9602ac commit 7d22e18
Show file tree
Hide file tree
Showing 10 changed files with 97 additions and 76 deletions.
4 changes: 2 additions & 2 deletions dist/api_data.py
Expand Up @@ -834,8 +834,8 @@ def __ge__(self, other):
'failpoint_eviction_fail_after_reconciliation',
'failpoint_history_store_delete_key_from_ts', 'history_store_checkpoint_delay',
'history_store_search', 'history_store_sweep_race', 'prepare_checkpoint_delay',
'sleep_before_read_overflow_onpage', 'split_1', 'split_2', 'split_3', 'split_4', 'split_5',
'split_6', 'split_7', 'split_8', 'tiered_flush_finish']),
'prepare_resolution','sleep_before_read_overflow_onpage', 'split_1', 'split_2', 'split_3',
'split_4', 'split_5','split_6', 'split_7', 'split_8', 'tiered_flush_finish']),
Config('verbose', '[]', r'''
enable messages for various subsystems and operations. Options are given as a list,
where each message type can optionally define an associated verbosity level, such as
Expand Down
44 changes: 26 additions & 18 deletions src/btree/bt_cursor.c
Expand Up @@ -628,33 +628,41 @@ __wt_btcur_search_prepared(WT_CURSOR *cursor, WT_UPDATE **updp)
{
WT_BTREE *btree;
WT_CURSOR_BTREE *cbt;
WT_DECL_RET;
WT_UPDATE *upd;

*updp = NULL;

*updp = upd = NULL; /* -Wuninitialized */
cbt = (WT_CURSOR_BTREE *)cursor;
btree = CUR2BT(cbt);
upd = NULL; /* -Wuninitialized */

/*
* Not calling the cursor initialization functions, we don't want to be tapped for eviction nor
* do we want other standard cursor semantics like snapshots, just discard the hazard pointer
* from the last operation. This also depends on the fact we're not setting the cursor's active
* flag, this is really a special chunk of code and not to be modified without careful thought.
* Set the key only flag to indicate to the search that we don't want to check visibility we
* just want to position on a key. This short circuits validity checking.
*/
WT_RET(__cursor_reset(cbt));

WT_RET(btree->type == BTREE_ROW ? __cursor_row_search(cbt, false, NULL, NULL) :
__cursor_col_search(cbt, NULL, NULL));

F_SET(&cbt->iface, WT_CURSTD_KEY_ONLY);
/*
* Ideally an exact match will be found, as this transaction is searching for updates done by
* itself. But, we cannot be sure of finding one, as pre-processing of the updates could have
* happened as part of resolving earlier transaction operations.
* The search logic searches the pinned page first, which would be the previously resolved
* update chain's page. If that doesn't find the key we want it searches from the root.
*/
if (cbt->compare != 0)
return (0);

ret = __wt_btcur_search(cbt);
F_CLR(&cbt->iface, WT_CURSTD_KEY_ONLY);
/*
* The following assertion relies on the fact that for every prepared update there must be an
* associated key. However this is only true if we pin the page to prevent eviction. By calling
* into the standard search function we avoid releasing our hazard pointer between update chain
* resolutions. It also depends on sorting the transaction modifications by key, if we didn't do
* that we would unpin the page between searches and later come back to the same key. We rely on
* resolving all updates for a single key in sequence.
*
* This is a complex scenario, suppose we have two updates to the same key by our transaction,
* and are resolving the prepared updates. The first pass resolves the update chain, now if we
* let eviction run it could evict the page and it will treat the update chain as a regular non
* prepared update chain. If we were rolling back the transaction the key may not exist after
* eviction, similarly if we wrote a globally visible tombstone. Thus our second attempt at
* resolution would fail as it wouldn't find a key.
*/
WT_ASSERT_ALWAYS(
CUR2S(cursor), ret == 0, "A valid key must exist when resolving prepared updates.");
/* Get any uncommitted update from the in-memory page. */
switch (btree->type) {
case BTREE_ROW:
Expand Down
30 changes: 15 additions & 15 deletions src/config/config_def.c
Expand Up @@ -179,9 +179,9 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
"\"failpoint_history_store_delete_key_from_ts\","
"\"history_store_checkpoint_delay\",\"history_store_search\","
"\"history_store_sweep_race\",\"prepare_checkpoint_delay\","
"\"sleep_before_read_overflow_onpage\",\"split_1\",\"split_2\","
"\"split_3\",\"split_4\",\"split_5\",\"split_6\",\"split_7\","
"\"split_8\",\"tiered_flush_finish\"]",
"\"prepare_resolution\",\"sleep_before_read_overflow_onpage\","
"\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
"\"split_6\",\"split_7\",\"split_8\",\"tiered_flush_finish\"]",
NULL, 0},
{"verbose", "list", NULL,
"choices=[\"api\",\"backup\",\"block\",\"block_cache\","
Expand Down Expand Up @@ -944,9 +944,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
"\"failpoint_history_store_delete_key_from_ts\","
"\"history_store_checkpoint_delay\",\"history_store_search\","
"\"history_store_sweep_race\",\"prepare_checkpoint_delay\","
"\"sleep_before_read_overflow_onpage\",\"split_1\",\"split_2\","
"\"split_3\",\"split_4\",\"split_5\",\"split_6\",\"split_7\","
"\"split_8\",\"tiered_flush_finish\"]",
"\"prepare_resolution\",\"sleep_before_read_overflow_onpage\","
"\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
"\"split_6\",\"split_7\",\"split_8\",\"tiered_flush_finish\"]",
NULL, 0},
{"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs,
2},
Expand Down Expand Up @@ -1038,9 +1038,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
"\"failpoint_history_store_delete_key_from_ts\","
"\"history_store_checkpoint_delay\",\"history_store_search\","
"\"history_store_sweep_race\",\"prepare_checkpoint_delay\","
"\"sleep_before_read_overflow_onpage\",\"split_1\",\"split_2\","
"\"split_3\",\"split_4\",\"split_5\",\"split_6\",\"split_7\","
"\"split_8\",\"tiered_flush_finish\"]",
"\"prepare_resolution\",\"sleep_before_read_overflow_onpage\","
"\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
"\"split_6\",\"split_7\",\"split_8\",\"tiered_flush_finish\"]",
NULL, 0},
{"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs,
2},
Expand Down Expand Up @@ -1130,9 +1130,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
"\"failpoint_history_store_delete_key_from_ts\","
"\"history_store_checkpoint_delay\",\"history_store_search\","
"\"history_store_sweep_race\",\"prepare_checkpoint_delay\","
"\"sleep_before_read_overflow_onpage\",\"split_1\",\"split_2\","
"\"split_3\",\"split_4\",\"split_5\",\"split_6\",\"split_7\","
"\"split_8\",\"tiered_flush_finish\"]",
"\"prepare_resolution\",\"sleep_before_read_overflow_onpage\","
"\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
"\"split_6\",\"split_7\",\"split_8\",\"tiered_flush_finish\"]",
NULL, 0},
{"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs,
2},
Expand Down Expand Up @@ -1220,9 +1220,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
"\"failpoint_history_store_delete_key_from_ts\","
"\"history_store_checkpoint_delay\",\"history_store_search\","
"\"history_store_sweep_race\",\"prepare_checkpoint_delay\","
"\"sleep_before_read_overflow_onpage\",\"split_1\",\"split_2\","
"\"split_3\",\"split_4\",\"split_5\",\"split_6\",\"split_7\","
"\"split_8\",\"tiered_flush_finish\"]",
"\"prepare_resolution\",\"sleep_before_read_overflow_onpage\","
"\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
"\"split_6\",\"split_7\",\"split_8\",\"tiered_flush_finish\"]",
NULL, 0},
{"transaction_sync", "category", NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs,
2},
Expand Down
1 change: 1 addition & 0 deletions src/conn/conn_api.c
Expand Up @@ -2360,6 +2360,7 @@ __wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[])
{"history_store_search", WT_TIMING_STRESS_HS_SEARCH},
{"history_store_sweep_race", WT_TIMING_STRESS_HS_SWEEP},
{"prepare_checkpoint_delay", WT_TIMING_STRESS_PREPARE_CHECKPOINT_DELAY},
{"prepare_resolution", WT_TIMING_STRESS_PREPARE_RESOLUTION},
{"sleep_before_read_overflow_onpage", WT_TIMING_STRESS_SLEEP_BEFORE_READ_OVERFLOW_ONPAGE},
{"split_1", WT_TIMING_STRESS_SPLIT_1}, {"split_2", WT_TIMING_STRESS_SPLIT_2},
{"split_3", WT_TIMING_STRESS_SPLIT_3}, {"split_4", WT_TIMING_STRESS_SPLIT_4},
Expand Down
49 changes: 25 additions & 24 deletions src/include/connection.h
Expand Up @@ -619,30 +619,31 @@ struct __wt_connection_impl {
* Variable with flags for which subsystems the diagnostic stress timing delays have been requested.
*/
/* AUTOMATIC FLAG VALUE GENERATION START 0 */
#define WT_TIMING_STRESS_AGGRESSIVE_SWEEP 0x000001u
#define WT_TIMING_STRESS_BACKUP_RENAME 0x000002u
#define WT_TIMING_STRESS_CHECKPOINT_EVICT_PAGE 0x000004u
#define WT_TIMING_STRESS_CHECKPOINT_HANDLE 0x000008u
#define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x000010u
#define WT_TIMING_STRESS_CHECKPOINT_STOP 0x000020u
#define WT_TIMING_STRESS_COMPACT_SLOW 0x000040u
#define WT_TIMING_STRESS_EVICT_REPOSITION 0x000080u
#define WT_TIMING_STRESS_FAILPOINT_EVICTION_FAIL_AFTER_RECONCILIATION 0x000100u
#define WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_DELETE_KEY_FROM_TS 0x000200u
#define WT_TIMING_STRESS_HS_CHECKPOINT_DELAY 0x000400u
#define WT_TIMING_STRESS_HS_SEARCH 0x000800u
#define WT_TIMING_STRESS_HS_SWEEP 0x001000u
#define WT_TIMING_STRESS_PREPARE_CHECKPOINT_DELAY 0x002000u
#define WT_TIMING_STRESS_SLEEP_BEFORE_READ_OVERFLOW_ONPAGE 0x004000u
#define WT_TIMING_STRESS_SPLIT_1 0x008000u
#define WT_TIMING_STRESS_SPLIT_2 0x010000u
#define WT_TIMING_STRESS_SPLIT_3 0x020000u
#define WT_TIMING_STRESS_SPLIT_4 0x040000u
#define WT_TIMING_STRESS_SPLIT_5 0x080000u
#define WT_TIMING_STRESS_SPLIT_6 0x100000u
#define WT_TIMING_STRESS_SPLIT_7 0x200000u
#define WT_TIMING_STRESS_SPLIT_8 0x400000u
#define WT_TIMING_STRESS_TIERED_FLUSH_FINISH 0x800000u
#define WT_TIMING_STRESS_AGGRESSIVE_SWEEP 0x0000001u
#define WT_TIMING_STRESS_BACKUP_RENAME 0x0000002u
#define WT_TIMING_STRESS_CHECKPOINT_EVICT_PAGE 0x0000004u
#define WT_TIMING_STRESS_CHECKPOINT_HANDLE 0x0000008u
#define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x0000010u
#define WT_TIMING_STRESS_CHECKPOINT_STOP 0x0000020u
#define WT_TIMING_STRESS_COMPACT_SLOW 0x0000040u
#define WT_TIMING_STRESS_EVICT_REPOSITION 0x0000080u
#define WT_TIMING_STRESS_FAILPOINT_EVICTION_FAIL_AFTER_RECONCILIATION 0x0000100u
#define WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_DELETE_KEY_FROM_TS 0x0000200u
#define WT_TIMING_STRESS_HS_CHECKPOINT_DELAY 0x0000400u
#define WT_TIMING_STRESS_HS_SEARCH 0x0000800u
#define WT_TIMING_STRESS_HS_SWEEP 0x0001000u
#define WT_TIMING_STRESS_PREPARE_CHECKPOINT_DELAY 0x0002000u
#define WT_TIMING_STRESS_PREPARE_RESOLUTION 0x0004000u
#define WT_TIMING_STRESS_SLEEP_BEFORE_READ_OVERFLOW_ONPAGE 0x0008000u
#define WT_TIMING_STRESS_SPLIT_1 0x0010000u
#define WT_TIMING_STRESS_SPLIT_2 0x0020000u
#define WT_TIMING_STRESS_SPLIT_3 0x0040000u
#define WT_TIMING_STRESS_SPLIT_4 0x0080000u
#define WT_TIMING_STRESS_SPLIT_5 0x0100000u
#define WT_TIMING_STRESS_SPLIT_6 0x0200000u
#define WT_TIMING_STRESS_SPLIT_7 0x0400000u
#define WT_TIMING_STRESS_SPLIT_8 0x0800000u
#define WT_TIMING_STRESS_TIERED_FLUSH_FINISH 0x1000000u
/* AUTOMATIC FLAG VALUE GENERATION STOP 32 */
uint32_t timing_stress_flags;

Expand Down
3 changes: 3 additions & 0 deletions src/txn/txn.c
Expand Up @@ -1663,6 +1663,9 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
*/
if (!F_ISSET(op, WT_TXN_OP_KEY_REPEATED))
WT_ERR(__txn_resolve_prepared_op(session, op, true, &cursor));

/* Sleep between resolving prepared operations when configured. */
__wt_timing_stress(session, WT_TIMING_STRESS_PREPARE_RESOLUTION, NULL);
#ifdef HAVE_DIAGNOSTIC
++prepare_count;
#endif
Expand Down
35 changes: 18 additions & 17 deletions test/format/config.h
Expand Up @@ -142,21 +142,22 @@ typedef struct {
#define V_GLOBAL_STRESS_HS_CHECKPOINT_DELAY 110
#define V_GLOBAL_STRESS_HS_SEARCH 111
#define V_GLOBAL_STRESS_HS_SWEEP 112
#define V_GLOBAL_STRESS_SLEEP_BEFORE_READ_OVERFLOW_ONPAGE 113
#define V_GLOBAL_STRESS_SPLIT_1 114
#define V_GLOBAL_STRESS_SPLIT_2 115
#define V_GLOBAL_STRESS_SPLIT_3 116
#define V_GLOBAL_STRESS_SPLIT_4 117
#define V_GLOBAL_STRESS_SPLIT_5 118
#define V_GLOBAL_STRESS_SPLIT_6 119
#define V_GLOBAL_STRESS_SPLIT_7 120
#define V_GLOBAL_STRESS_SPLIT_8 121
#define V_GLOBAL_TIERED_STORAGE_FLUSH_FREQUENCY 122
#define V_GLOBAL_TIERED_STORAGE_STORAGE_SOURCE 123
#define V_GLOBAL_TRANSACTION_IMPLICIT 124
#define V_GLOBAL_TRANSACTION_TIMESTAMPS 125
#define V_GLOBAL_WIREDTIGER_CONFIG 126
#define V_GLOBAL_WIREDTIGER_RWLOCK 127
#define V_GLOBAL_WIREDTIGER_LEAK_MEMORY 128
#define V_GLOBAL_STRESS_PREPARE_RESOLUTION 113
#define V_GLOBAL_STRESS_SLEEP_BEFORE_READ_OVERFLOW_ONPAGE 114
#define V_GLOBAL_STRESS_SPLIT_1 115
#define V_GLOBAL_STRESS_SPLIT_2 116
#define V_GLOBAL_STRESS_SPLIT_3 117
#define V_GLOBAL_STRESS_SPLIT_4 118
#define V_GLOBAL_STRESS_SPLIT_5 119
#define V_GLOBAL_STRESS_SPLIT_6 120
#define V_GLOBAL_STRESS_SPLIT_7 121
#define V_GLOBAL_STRESS_SPLIT_8 122
#define V_GLOBAL_TIERED_STORAGE_FLUSH_FREQUENCY 123
#define V_GLOBAL_TIERED_STORAGE_STORAGE_SOURCE 124
#define V_GLOBAL_TRANSACTION_IMPLICIT 125
#define V_GLOBAL_TRANSACTION_TIMESTAMPS 126
#define V_GLOBAL_WIREDTIGER_CONFIG 127
#define V_GLOBAL_WIREDTIGER_RWLOCK 128
#define V_GLOBAL_WIREDTIGER_LEAK_MEMORY 129

#define V_ELEMENT_COUNT 129
#define V_ELEMENT_COUNT 130
2 changes: 2 additions & 0 deletions test/format/config.sh
Expand Up @@ -303,6 +303,8 @@ CONFIG configuration_list[] = {
{"stress.hs_sweep", "stress history store sweep", C_BOOL, 2, 0, 0}
{"stress.prepare_resolution", "stress prepare resolution", C_BOOL, 2, 0, 0}
{"stress.sleep_before_read_overflow_onpage", "stress onpage overflow read race with checkpoint", C_BOOL, 2, 0, 0}
{"stress.split_1", "stress splits (#1)", C_BOOL, 2, 0, 0}
Expand Down
3 changes: 3 additions & 0 deletions test/format/format_config_def.c
Expand Up @@ -328,6 +328,9 @@ CONFIG configuration_list[] = {{"assert.read_timestamp", "assert read_timestamp"

{"stress.hs_sweep", "stress history store sweep", C_BOOL, 2, 0, 0, V_GLOBAL_STRESS_HS_SWEEP},

{"stress.prepare_resolution", "stress prepare resolution", C_BOOL, 2, 0, 0,
V_GLOBAL_STRESS_PREPARE_RESOLUTION},

{"stress.sleep_before_read_overflow_onpage", "stress onpage overflow read race with checkpoint",
C_BOOL, 2, 0, 0, V_GLOBAL_STRESS_SLEEP_BEFORE_READ_OVERFLOW_ONPAGE},

Expand Down
2 changes: 2 additions & 0 deletions test/format/wts.c
Expand Up @@ -177,6 +177,8 @@ configure_timing_stress(char **p, size_t max)
CONFIG_APPEND(*p, ",history_store_search");
if (GV(STRESS_HS_SWEEP))
CONFIG_APPEND(*p, ",history_store_sweep_race");
if (GV(STRESS_PREPARE_RESOLUTION))
CONFIG_APPEND(*p, ",prepare_resolution");
if (GV(STRESS_SLEEP_BEFORE_READ_OVERFLOW_ONPAGE))
CONFIG_APPEND(*p, ",sleep_before_read_overflow_onpage");
if (GV(STRESS_SPLIT_1))
Expand Down

0 comments on commit 7d22e18

Please sign in to comment.