Skip to content

Commit

Permalink
Merge pull request #2456 from wiredtiger/WT-2346
Browse files Browse the repository at this point in the history
WT-2346 Don't hold the schema lock during checkpoint I/O.
  • Loading branch information
michaelcahill committed Feb 12, 2016
2 parents 3bc10f5 + c20caba commit 8b996c0
Show file tree
Hide file tree
Showing 17 changed files with 142 additions and 235 deletions.
1 change: 1 addition & 0 deletions dist/flags.py
Expand Up @@ -114,6 +114,7 @@
'SESSION_LOCK_NO_WAIT',
'SESSION_LOCKED_CHECKPOINT',
'SESSION_LOCKED_HANDLE_LIST',
'SESSION_LOCKED_METADATA',
'SESSION_LOCKED_SCHEMA',
'SESSION_LOCKED_SLOT',
'SESSION_LOCKED_TABLE',
Expand Down
1 change: 1 addition & 0 deletions dist/s_string.ok
Expand Up @@ -189,6 +189,7 @@ MALLOC
MEM
MEMALIGN
MERCHANTABILITY
METADATA
MONGODB
MSVC
MULTIBLOCK
Expand Down
115 changes: 10 additions & 105 deletions src/conn/conn_dhandle.c
Expand Up @@ -368,24 +368,21 @@ __conn_btree_apply_internal(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle,
* sure it's referenced to stop other internal code dropping the handle
* (e.g in LSM when cleaning up obsolete chunks).
*/
ret = __wt_session_get_btree(session,
dhandle->name, dhandle->checkpoint, NULL, 0);
if (ret == 0) {
WT_SAVE_DHANDLE(session,
ret = func(session, cfg));
if (WT_META_TRACKING(session))
WT_TRET(__wt_meta_track_handle_lock(session, false));
else
WT_TRET(__wt_session_release_btree(session));
} else if (ret == EBUSY)
ret = __wt_conn_btree_apply_single(session, dhandle->name,
dhandle->checkpoint, func, cfg);
if ((ret = __wt_session_get_btree(session,
dhandle->name, dhandle->checkpoint, NULL, 0)) != 0)
return (ret == EBUSY ? 0 : ret);

WT_SAVE_DHANDLE(session, ret = func(session, cfg));
if (WT_META_TRACKING(session))
WT_TRET(__wt_meta_track_handle_lock(session, false));
else
WT_TRET(__wt_session_release_btree(session));
return (ret);
}

/*
* __wt_conn_btree_apply --
* Apply a function to all open btree handles apart from the metadata.
* Apply a function to all open btree handles with the given URI.
*/
int
__wt_conn_btree_apply(WT_SESSION_IMPL *session,
Expand Down Expand Up @@ -429,98 +426,6 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session,
return (0);
}

/*
* __wt_conn_btree_apply_single_ckpt --
* Decode any checkpoint information from the configuration string then
* call btree apply single.
*/
int
__wt_conn_btree_apply_single_ckpt(WT_SESSION_IMPL *session,
const char *uri,
int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[])
{
WT_CONFIG_ITEM cval;
WT_DECL_RET;
const char *checkpoint;

checkpoint = NULL;

/*
* This function exists to handle checkpoint configuration. Callers
* that never open a checkpoint call the underlying function directly.
*/
WT_RET_NOTFOUND_OK(
__wt_config_gets_def(session, cfg, "checkpoint", 0, &cval));
if (cval.len != 0) {
/*
* The internal checkpoint name is special, find the last
* unnamed checkpoint of the object.
*/
if (WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) {
WT_RET(__wt_meta_checkpoint_last_name(
session, uri, &checkpoint));
} else
WT_RET(__wt_strndup(
session, cval.str, cval.len, &checkpoint));
}

ret = __wt_conn_btree_apply_single(session, uri, checkpoint, func, cfg);

__wt_free(session, checkpoint);

return (ret);
}

/*
* __wt_conn_btree_apply_single --
* Apply a function to a single btree handle that couldn't be locked
* (attempting to get the handle returned EBUSY).
*/
int
__wt_conn_btree_apply_single(WT_SESSION_IMPL *session,
const char *uri, const char *checkpoint,
int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[])
{
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
uint64_t bucket, hash;

conn = S2C(session);

WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));

hash = __wt_hash_city64(uri, strlen(uri));
bucket = hash % WT_HASH_ARRAY_SIZE;
TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq)
if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
!F_ISSET(dhandle, WT_DHANDLE_DEAD) &&
(hash == dhandle->name_hash &&
strcmp(uri, dhandle->name) == 0) &&
((dhandle->checkpoint == NULL && checkpoint == NULL) ||
(dhandle->checkpoint != NULL && checkpoint != NULL &&
strcmp(dhandle->checkpoint, checkpoint) == 0))) {
/*
* We're holding the handle list lock which locks out
* handle open (which might change the state of the
* underlying object). However, closing a handle
* doesn't require the handle list lock, lock out
* closing the handle and then confirm the handle is
* still open.
*/
__wt_spin_lock(session, &dhandle->close_lock);
if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
!F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
WT_WITH_DHANDLE(session, dhandle,
ret = func(session, cfg));
}
__wt_spin_unlock(session, &dhandle->close_lock);
WT_RET(ret);
}

return (0);
}

/*
* __wt_conn_dhandle_close_all --
* Close all data handles handles with matching name (including all
Expand Down
2 changes: 2 additions & 0 deletions src/conn/conn_handle.c
Expand Up @@ -56,6 +56,7 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn)
WT_RET(__wt_rwlock_alloc(session,
&conn->hot_backup_lock, "hot backup"));
WT_RET(__wt_spin_init(session, &conn->las_lock, "lookaside table"));
WT_RET(__wt_spin_init(session, &conn->metadata_lock, "metadata"));
WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure"));
WT_RET(__wt_spin_init(session, &conn->schema_lock, "schema"));
WT_RET(__wt_spin_init(session, &conn->table_lock, "table creation"));
Expand Down Expand Up @@ -143,6 +144,7 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
__wt_spin_destroy(session, &conn->fh_lock);
WT_TRET(__wt_rwlock_destroy(session, &conn->hot_backup_lock));
__wt_spin_destroy(session, &conn->las_lock);
__wt_spin_destroy(session, &conn->metadata_lock);
__wt_spin_destroy(session, &conn->reconfig_lock);
__wt_spin_destroy(session, &conn->schema_lock);
__wt_spin_destroy(session, &conn->table_lock);
Expand Down
34 changes: 20 additions & 14 deletions src/docs/checkpoint.dox
Expand Up @@ -23,11 +23,16 @@ All transactional updates committed before a checkpoint are made durable
by the checkpoint, therefore the frequency of checkpoints limits the
volume of data that may be lost due to application or system failure.

When WiredTiger data sources are first opened, they are opened in the
state of the most recent checkpoint taken on the file, in other words,
updates after the most recent checkpoint will not appear in the data
source. If no checkpoint is found when the data source is opened, the
data source will appear empty.
Data sources that are involved in an exclusive operation when the
checkpoint starts, including bulk load, verify or salvage, will be skipped
by the checkpoint. Operations requiring exclusive access may fail with
an \c EBUSY error if attempted during a checkpoint.

When data sources are first opened, they are opened in the state of the
most recent checkpoint taken on the file, in other words, updates after the
most recent checkpoint will not appear in the data source. If no
checkpoint is found when the data source is opened, the data source will
appear empty.

@section checkpoint_server Automatic checkpoints

Expand All @@ -54,15 +59,16 @@ checkpoint cursor is closed.

@section checkpoint_naming Checkpoint naming

Additionally, checkpoints that do not include LSM trees may optionally
be given names by the application. Checkpoints named by the application
persist until explicitly discarded or the application creates a new
checkpoint with the same name (which replaces the previous checkpoint
of that name). If the previous checkpoint cannot be replaced, either
because a cursor is reading from the previous checkpoint, or backups are
in progress, the checkpoint will fail. Because named checkpoints
persist until discarded or replaced, they can be used to periodically
snapshot data for later use.
Additionally, checkpoints that do not include LSM trees may optionally be
given names by the application. Because named checkpoints persist until
discarded or replaced, they can be used to periodically snapshot data for
later use.

Checkpoints named by the application persist until explicitly discarded or
the application creates a new checkpoint with the same name (which replaces
the previous checkpoint of that name). If the previous checkpoint cannot be
replaced, either because a cursor is reading from the previous checkpoint,
or backups are in progress, the checkpoint will fail.

Internal checkpoints (that is, checkpoints not named by the application)
use the reserved name "WiredTigerCheckpoint". Applications can open the
Expand Down
35 changes: 21 additions & 14 deletions src/docs/upgrading.dox
Expand Up @@ -4,25 +4,24 @@
<dl>
<dt>Column-store bulk-load cursors</dt>
<dd>
Historically, bulk-load of a column-store object ignored any key set in
the cursor and automatically assigned each inserted row the next
sequential record number for its key. In the 2.7.1 release, column-store
objects match row-store behavior and require the cursor key be set
before an insert. (This also allows allows sparse tables to be created
in column-store objects, any skipped records are created as
already-deleted rows.) To match the previous behavior, specify the
\c append configuration string when opening the column-store bulk-load
cursor; this causes the cursor's key to be ignored and each inserted row
will be assigned the next record number.
Historically, bulk-load of a column-store object ignored any key set in the
cursor and automatically assigned each inserted row the next sequential
record number for its key. In the 2.7.1 release, column-store objects match
row-store behavior and require the cursor key be set before an insert.
(This allows sparse tables to be created in column-store objects, any
skipped records are created as already-deleted rows.) To match the previous
behavior, specify the \c append configuration string when opening the
column-store bulk-load cursor; this causes the cursor's key to be ignored
and each inserted row will be assigned the next record number.
</dd>

<dt>Change to WT_SESSION::truncate with URI</dt>
<dd>
If using the WT_SESSION::truncate API with a file: URI for a full table
truncate, underlying algorithmic changes result in some visible differences.
This call can now return WT_ROLLBACK. Applications should be prepared to
handle this error. This method no longer requires exclusive access to the
table. Also the underlying disk space may not be immediately
truncate, underlying algorithmic changes result in some visible
differences. This call can now return WT_ROLLBACK. Applications should be
prepared to handle this error. This method no longer requires exclusive
access to the table. Also the underlying disk space may not be immediately
reclaimed when the call returns. The performance of this API may differ
from earlier releases.
</dd>
Expand All @@ -34,6 +33,14 @@ from the WiredTiger release; remaining compression engines include LZ4,
snappy and zlib.
</dd>

<dt>Change to named checkpoints with bulk loads</dt>
<dd>
Previous versions of WiredTiger created empty named checkpoints in files
being bulk-loaded. In this release, checkpoint skips files being
bulk-loaded, so they do not get named checkpoints that complete during the
bulk load.
</dd>

</dl><hr>
@section version_270 Upgrading to Version 2.7.0

Expand Down
1 change: 1 addition & 0 deletions src/include/connection.h
Expand Up @@ -175,6 +175,7 @@ struct __wt_connection_impl {
WT_SPINLOCK checkpoint_lock; /* Checkpoint spinlock */
WT_SPINLOCK dhandle_lock; /* Data handle list spinlock */
WT_SPINLOCK fh_lock; /* File handle queue spinlock */
WT_SPINLOCK metadata_lock; /* Metadata update spinlock */
WT_SPINLOCK reconfig_lock; /* Single thread reconfigure */
WT_SPINLOCK schema_lock; /* Schema operation spinlock */
WT_SPINLOCK table_lock; /* Table creation spinlock */
Expand Down
2 changes: 0 additions & 2 deletions src/include/extern.h
Expand Up @@ -253,8 +253,6 @@ extern int __wt_conn_dhandle_find( WT_SESSION_IMPL *session, const char *uri, co
extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force);
extern int __wt_conn_btree_open( WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags);
extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, bool apply_checkpoints, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
extern int __wt_conn_btree_apply_single_ckpt(WT_SESSION_IMPL *session, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
extern int __wt_conn_btree_apply_single(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
extern int __wt_conn_dhandle_close_all( WT_SESSION_IMPL *session, const char *uri, bool force);
extern int __wt_conn_dhandle_discard_single( WT_SESSION_IMPL *session, bool final, bool force);
extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session);
Expand Down
29 changes: 15 additions & 14 deletions src/include/flags.h
Expand Up @@ -55,20 +55,21 @@
#define WT_SESSION_INTERNAL 0x00000004
#define WT_SESSION_LOCKED_CHECKPOINT 0x00000008
#define WT_SESSION_LOCKED_HANDLE_LIST 0x00000010
#define WT_SESSION_LOCKED_SCHEMA 0x00000020
#define WT_SESSION_LOCKED_SLOT 0x00000040
#define WT_SESSION_LOCKED_TABLE 0x00000080
#define WT_SESSION_LOCKED_TURTLE 0x00000100
#define WT_SESSION_LOCK_NO_WAIT 0x00000200
#define WT_SESSION_LOGGING_INMEM 0x00000400
#define WT_SESSION_LOOKASIDE_CURSOR 0x00000800
#define WT_SESSION_NO_CACHE 0x00001000
#define WT_SESSION_NO_DATA_HANDLES 0x00002000
#define WT_SESSION_NO_EVICTION 0x00004000
#define WT_SESSION_NO_LOGGING 0x00008000
#define WT_SESSION_NO_SCHEMA_LOCK 0x00010000
#define WT_SESSION_QUIET_CORRUPT_FILE 0x00020000
#define WT_SESSION_SERVER_ASYNC 0x00040000
#define WT_SESSION_LOCKED_METADATA 0x00000020
#define WT_SESSION_LOCKED_SCHEMA 0x00000040
#define WT_SESSION_LOCKED_SLOT 0x00000080
#define WT_SESSION_LOCKED_TABLE 0x00000100
#define WT_SESSION_LOCKED_TURTLE 0x00000200
#define WT_SESSION_LOCK_NO_WAIT 0x00000400
#define WT_SESSION_LOGGING_INMEM 0x00000800
#define WT_SESSION_LOOKASIDE_CURSOR 0x00001000
#define WT_SESSION_NO_CACHE 0x00002000
#define WT_SESSION_NO_DATA_HANDLES 0x00004000
#define WT_SESSION_NO_EVICTION 0x00008000
#define WT_SESSION_NO_LOGGING 0x00010000
#define WT_SESSION_NO_SCHEMA_LOCK 0x00020000
#define WT_SESSION_QUIET_CORRUPT_FILE 0x00040000
#define WT_SESSION_SERVER_ASYNC 0x00080000
#define WT_TXN_LOG_CKPT_CLEANUP 0x00000001
#define WT_TXN_LOG_CKPT_PREPARE 0x00000002
#define WT_TXN_LOG_CKPT_START 0x00000004
Expand Down
8 changes: 8 additions & 0 deletions src/include/schema.h
Expand Up @@ -132,6 +132,14 @@ struct __wt_table {
WT_WITH_LOCK_WAIT(session, \
&S2C(session)->dhandle_lock, WT_SESSION_LOCKED_HANDLE_LIST, op)

/*
* WT_WITH_METADATA_LOCK --
* Acquire the metadata lock, perform an operation, drop the lock.
*/
#define WT_WITH_METADATA_LOCK(session, ret, op) \
WT_WITH_LOCK(session, ret, \
&S2C(session)->metadata_lock, WT_SESSION_LOCKED_METADATA, op)

/*
* WT_WITH_SCHEMA_LOCK --
* Acquire the schema lock, perform an operation, drop the lock.
Expand Down
5 changes: 1 addition & 4 deletions src/include/session.h
Expand Up @@ -127,10 +127,7 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl {
int (*block_manager_cleanup)(WT_SESSION_IMPL *);

/* Checkpoint support */
struct {
WT_DATA_HANDLE *dhandle;
const char *name;
} *ckpt_handle; /* Handle list */
WT_DATA_HANDLE **ckpt_handle; /* Handle list */
u_int ckpt_handle_next; /* Next empty slot */
size_t ckpt_handle_allocated; /* Bytes allocated */

Expand Down
21 changes: 17 additions & 4 deletions src/lsm/lsm_work_unit.c
Expand Up @@ -334,14 +334,27 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
/*
* Turn on metadata tracking to ensure the checkpoint gets the
* necessary handle locks.
*
* Ensure that we don't race with a running checkpoint: the checkpoint
* lock protects against us racing with an application checkpoint in
* this chunk. Don't wait for it, though: checkpoints can take a long
* time, and our checkpoint operation should be very quick.
*/
WT_ERR(__wt_meta_track_on(session));
WT_WITH_SCHEMA_LOCK(session, ret,
ret = __wt_schema_worker(
session, chunk->uri, __wt_checkpoint, NULL, NULL, 0));
F_SET(session, WT_SESSION_LOCK_NO_WAIT);
WT_WITH_CHECKPOINT_LOCK(session, ret,
WT_WITH_SCHEMA_LOCK(session, ret,
ret = __wt_schema_worker(
session, chunk->uri, __wt_checkpoint, NULL, NULL, 0)));
WT_TRET(__wt_meta_track_off(session, false, ret != 0));
if (ret != 0)
F_CLR(session, WT_SESSION_LOCK_NO_WAIT);
if (ret != 0) {
if (ret == EBUSY) {
ret = 0;
goto err;
}
WT_ERR_MSG(session, ret, "LSM checkpoint");
}

/* Now the file is written, get the chunk size. */
WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk));
Expand Down

0 comments on commit 8b996c0

Please sign in to comment.