Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fix races in table-level schema operations.

closes #191
  • Loading branch information...
commit 4f05e1298898d51febaeaa3ab9e20de4bb2babf8 1 parent 00088cc
Michael Cahill authored
View
8 src/conn/conn_btree.c
@@ -335,8 +335,10 @@ __wt_conn_btree_close(WT_SESSION_IMPL *session, int locked)
*/
__wt_spin_lock(session, &conn->spinlock);
inuse = --btree->refcnt > 0;
- if (!inuse && !locked)
+ if (!inuse && !locked) {
__wt_writelock(session, btree->rwlock);
+ F_SET(btree, WT_BTREE_EXCLUSIVE);
+ }
__wt_spin_unlock(session, &conn->spinlock);
if (!inuse) {
@@ -350,8 +352,10 @@ __wt_conn_btree_close(WT_SESSION_IMPL *session, int locked)
if (F_ISSET(btree, WT_BTREE_OPEN))
WT_TRET(__wt_conn_btree_sync_and_close(session));
- if (!locked)
+ if (!locked) {
+ F_CLR(btree, WT_BTREE_EXCLUSIVE);
__wt_rwunlock(session, btree->rwlock);
+ }
}
return (ret);
View
4 src/conn/conn_handle.c
@@ -31,6 +31,9 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn)
/* File handle spinlock. */
__wt_spin_init(session, &conn->fh_lock);
+ /* Schema operation spinlock. */
+ __wt_spin_init(session, &conn->schema_lock);
+
/* Serialized function call spinlock. */
__wt_spin_init(session, &conn->serial_lock);
@@ -76,6 +79,7 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
__wt_spin_destroy(session, &conn->fh_lock);
__wt_spin_destroy(session, &conn->serial_lock);
+ __wt_spin_destroy(session, &conn->schema_lock);
__wt_spin_destroy(session, &conn->spinlock);
if (conn->ckpt_rwlock != NULL)
View
2  src/cursor/cur_file.c
@@ -290,7 +290,7 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri,
WT_RET(__wt_session_get_btree(session,
uri, cfg, bulk ? WT_BTREE_EXCLUSIVE : 0));
else
- return (EINVAL);
+ WT_RET_MSG(session, EINVAL, "Unexpected object type");
return (__wt_curfile_create(session, owner, cfg, cursorp));
}
View
1  src/include/api.h
@@ -185,6 +185,7 @@ struct __wt_connection_impl {
WT_SESSION_IMPL dummy_session;
WT_SPINLOCK fh_lock; /* File handle queue spinlock */
+ WT_SPINLOCK schema_lock; /* Schema operation spinlock */
WT_SPINLOCK serial_lock; /* Serial function call spinlock */
WT_SPINLOCK spinlock; /* General purpose spinlock */
View
41 src/meta/meta_table.c
@@ -46,8 +46,7 @@ __wt_metadata_open(WT_SESSION_IMPL *session)
WT_ASSERT(session, session->metafile != NULL);
/* The metafile doesn't need to stay locked -- release it. */
- WT_RET(__wt_session_release_btree(session));
- return (0);
+ return (__wt_session_release_btree(session));
}
/*
@@ -58,12 +57,20 @@ int
__wt_metadata_cursor(
WT_SESSION_IMPL *session, const char *config, WT_CURSOR **cursorp)
{
+ WT_BTREE *saved_btree;
+ WT_DECL_RET;
const char *cfg[] = API_CONF_DEFAULTS(session, open_cursor, config);
- WT_RET(__wt_metadata_open(session));
+ saved_btree = session->btree;
+ WT_ERR(__wt_metadata_open(session));
+
session->btree = session->metafile;
- WT_RET(__wt_session_lock_btree(session, 0));
- return (__wt_curfile_create(session, NULL, cfg, cursorp));
+ WT_ERR(__wt_session_lock_btree(session, 0));
+ ret = __wt_curfile_create(session, NULL, cfg, cursorp);
+
+ /* Restore the caller's btree. */
+err: session->btree = saved_btree;
+ return (ret);
}
/*
@@ -74,7 +81,6 @@ int
__wt_metadata_insert(
WT_SESSION_IMPL *session, const char *key, const char *value)
{
- WT_BTREE *btree;
WT_CURSOR *cursor;
WT_DECL_RET;
@@ -82,8 +88,6 @@ __wt_metadata_insert(
WT_RET_MSG(session, EINVAL,
"%s: insert not supported on the turtle file", key);
- /* Save the caller's btree: the metadata cursor will overwrite it. */
- btree = session->btree;
WT_ERR(__wt_metadata_cursor(session, NULL, &cursor));
cursor->set_key(cursor, key);
cursor->set_value(cursor, value);
@@ -92,9 +96,7 @@ __wt_metadata_insert(
ret = __wt_meta_track_insert(session, key);
WT_TRET(cursor->close(cursor));
- /* Restore the caller's btree. */
-err: session->btree = btree;
- return (ret);
+err: return (ret);
}
/*
@@ -105,7 +107,6 @@ int
__wt_metadata_update(
WT_SESSION_IMPL *session, const char *key, const char *value)
{
- WT_BTREE *btree;
WT_CURSOR *cursor;
WT_DECL_RET;
@@ -115,16 +116,12 @@ __wt_metadata_update(
if (WT_META_TRACKING(session))
WT_RET(__wt_meta_track_update(session, key));
- /* Save the caller's btree: the metadata cursor will overwrite it. */
- btree = session->btree;
WT_RET(__wt_metadata_cursor(session, "overwrite", &cursor));
cursor->set_key(cursor, key);
cursor->set_value(cursor, value);
WT_TRET(cursor->insert(cursor));
WT_TRET(cursor->close(cursor));
- /* Restore the caller's btree. */
- session->btree = btree;
return (ret);
}
@@ -135,7 +132,6 @@ __wt_metadata_update(
int
__wt_metadata_remove(WT_SESSION_IMPL *session, const char *key)
{
- WT_BTREE *btree;
WT_CURSOR *cursor;
WT_DECL_RET;
@@ -143,8 +139,6 @@ __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key)
WT_RET_MSG(session, EINVAL,
"%s: remove not supported on the turtle file", key);
- /* Save the caller's btree: the metadata cursor will overwrite it. */
- btree = session->btree;
WT_RET(__wt_metadata_cursor(session, NULL, &cursor));
cursor->set_key(cursor, key);
WT_TRET(cursor->search(cursor));
@@ -155,8 +149,6 @@ __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key)
}
WT_TRET(cursor->close(cursor));
- /* Restore the caller's btree. */
- session->btree = btree;
return (ret);
}
@@ -169,7 +161,6 @@ int
__wt_metadata_read(
WT_SESSION_IMPL *session, const char *key, const char **valuep)
{
- WT_BTREE *btree;
WT_CURSOR *cursor;
WT_DECL_RET;
const char *value;
@@ -177,16 +168,12 @@ __wt_metadata_read(
if (__metadata_turtle(key))
return (__wt_meta_turtle_read(session, key, valuep));
- /* Save the caller's btree: the metadata cursor will overwrite it. */
- btree = session->btree;
WT_RET(__wt_metadata_cursor(session, NULL, &cursor));
cursor->set_key(cursor, key);
WT_ERR(cursor->search(cursor));
WT_ERR(cursor->get_value(cursor, &value));
WT_ERR(__wt_strdup(session, value, valuep));
-err: WT_TRET(cursor->close(cursor));
- /* Restore the caller's btree. */
- session->btree = btree;
+err: WT_TRET(cursor->close(cursor));
return (ret);
}
View
16 src/schema/schema_create.c
@@ -170,6 +170,7 @@ __create_colgroup(WT_SESSION_IMPL *session,
WT_ERR(__wt_buf_fmt(session, &uribuf, "file:%s", filename));
fileuri = uribuf.data;
+ WT_ERR(__create_file(session, fileuri, exclusive, fileconf));
if ((ret = __wt_metadata_insert(session, name, cgconf)) != 0) {
/*
* If the entry already exists in the metadata, we're done.
@@ -179,10 +180,9 @@ __create_colgroup(WT_SESSION_IMPL *session,
ret = exclusive ? EEXIST : 0;
goto err;
}
- WT_ERR(__create_file(session, fileuri, exclusive, fileconf));
WT_ERR(__wt_schema_open_colgroups(session, table));
-err: __wt_free(session, cgconf);
+err: __wt_free(session, cgconf);
__wt_free(session, fileconf);
__wt_free(session, oldconf);
__wt_buf_free(session, &fmt);
@@ -279,6 +279,7 @@ __create_index(WT_SESSION_IMPL *session,
WT_ERR(__wt_buf_fmt(session, &uribuf, "file:%s", filename));
fileuri = uribuf.data;
+ WT_ERR(__create_file(session, fileuri, exclusive, fileconf));
if ((ret = __wt_metadata_insert(session, name, idxconf)) != 0) {
/*
* If the entry already exists in the metadata, we're done.
@@ -288,7 +289,6 @@ __create_index(WT_SESSION_IMPL *session,
ret = exclusive ? EEXIST : 0;
goto err;
}
- WT_ERR(__create_file(session, fileuri, exclusive, fileconf));
err: __wt_free(session, fileconf);
__wt_free(session, idxconf);
@@ -340,7 +340,15 @@ __create_table(WT_SESSION_IMPL *session,
return (ret);
WT_RET(__wt_config_collapse(session, cfg, &tableconf));
- WT_ERR(__wt_metadata_insert(session, name, tableconf));
+ if ((ret = __wt_metadata_insert(session, name, tableconf)) != 0) {
+ /*
+ * If the entry already exists in the metadata, we're done.
+ * This is an error for exclusive creates but okay otherwise.
+ */
+ if (ret == WT_DUPLICATE_KEY)
+ ret = exclusive ? EEXIST : 0;
+ goto err;
+ }
/* Attempt to open the table now to catch any errors. */
WT_ERR(__wt_schema_get_table(
View
20 src/schema/schema_drop.c
@@ -24,8 +24,8 @@ __drop_file(
return (EINVAL);
if (session->btree == NULL &&
- (ret = __wt_session_get_btree(
- session, uri, cfg, WT_BTREE_EXCLUSIVE | WT_BTREE_LOCK_ONLY)) != 0) {
+ (ret = __wt_session_get_btree(session, uri, cfg,
+ WT_BTREE_EXCLUSIVE | WT_BTREE_LOCK_ONLY)) != 0) {
if (ret == WT_NOTFOUND || ret == ENOENT)
ret = 0;
return (ret);
@@ -115,8 +115,8 @@ __drop_colgroup(
* Try to get the btree handle. It will be unlocked by
* __wt_conn_btree_close_all.
*/
- if ((ret = __wt_schema_get_btree(
- session, uri, strlen(uri), cfg, WT_BTREE_EXCLUSIVE)) != 0) {
+ if ((ret = __wt_schema_get_btree(session, uri, strlen(uri), cfg,
+ WT_BTREE_EXCLUSIVE | WT_BTREE_LOCK_ONLY)) != 0) {
if (ret == WT_NOTFOUND || ret == ENOENT)
ret = 0;
return (ret);
@@ -158,8 +158,8 @@ __drop_index(
* Try to get the btree handle. It will be unlocked by
* __wt_conn_btree_close_all.
*/
- if ((ret = __wt_schema_get_btree(
- session, uri, strlen(uri), cfg, WT_BTREE_EXCLUSIVE)) != 0) {
+ if ((ret = __wt_schema_get_btree(session, uri, strlen(uri), cfg,
+ WT_BTREE_EXCLUSIVE | WT_BTREE_LOCK_ONLY)) != 0) {
if (ret == WT_NOTFOUND || ret == ENOENT)
ret = 0;
return (ret);
@@ -199,22 +199,22 @@ __drop_table(
for (i = 0; i < WT_COLGROUPS(table); i++) {
if (table->cg_name[i] == NULL)
continue;
- WT_TRET(__drop_colgroup(
+ WT_ERR(__drop_colgroup(
session, table->cg_name[i], force, cfg));
}
/* Drop the indices. */
- WT_TRET(__wt_schema_open_index(session, table, NULL, 0));
+ WT_ERR(__wt_schema_open_index(session, table, NULL, 0));
for (i = 0; i < table->nindices; i++) {
if (table->idx_name[i] == NULL)
continue;
WT_TRET(__drop_index(session, table->idx_name[i], force, cfg));
}
- WT_TRET(__wt_schema_remove_table(session, table));
+ WT_ERR(__wt_schema_remove_table(session, table));
/* Remove the metadata entry (ignore missing items). */
- WT_TRET(__wt_metadata_remove(session, uri));
+ WT_ERR(__wt_metadata_remove(session, uri));
err: if (force && ret == WT_NOTFOUND)
ret = 0;
View
5 src/schema/schema_open.c
@@ -242,8 +242,10 @@ __open_index(WT_SESSION_IMPL *session, WT_TABLE *table,
err: __wt_buf_free(session, &cols);
__wt_buf_free(session, &uribuf);
- if (session->btree != NULL)
+ if (btree != NULL) {
+ session->btree = btree;
WT_TRET(__wt_session_release_btree(session));
+ }
return (ret);
}
@@ -401,7 +403,6 @@ __wt_schema_open_table(WT_SESSION_IMPL *session,
WT_ERR(__wt_calloc_def(session, WT_COLGROUPS(table), &table->cg_name));
WT_ERR(__wt_schema_open_colgroups(session, table));
-
*tablep = table;
if (0) {
View
5 src/schema/schema_truncate.c
@@ -45,11 +45,6 @@ __truncate_table(WT_SESSION_IMPL *session, const char *name)
WT_RET(__wt_schema_get_table(session, name, strlen(name), &table));
WT_RET(__wt_scr_alloc(session, 0, &namebuf));
- /*
- * We are closing the column groups, they must be reopened for future
- * accesses to the table.
- */
- table->cg_complete = 0;
/* Truncate the column groups. */
for (i = 0; i < WT_COLGROUPS(table); i++) {
View
1  src/schema/schema_worker.c
@@ -38,6 +38,7 @@ __wt_schema_worker(WT_SESSION_IMPL *session,
} else if (WT_PREFIX_SKIP(tablename, "table:")) {
WT_RET(__wt_schema_get_table(session,
tablename, strlen(tablename), &table));
+ WT_ASSERT(session, session->btree == NULL);
for (i = 0; i < WT_COLGROUPS(table); i++) {
cgname = table->cg_name[i];
View
24 src/session/session_api.c
@@ -178,8 +178,9 @@ __session_create(WT_SESSION *wt_session, const char *uri, const char *config)
/* Disallow objects in the WiredTiger name space. */
WT_ERR(__wt_schema_name_check(session, uri));
-
- WT_ERR(__wt_schema_create(session, uri, config));
+ __wt_spin_lock(session, &S2C(session)->schema_lock);
+ ret = __wt_schema_create(session, uri, config);
+ __wt_spin_unlock(session, &S2C(session)->schema_lock);
err: API_END_NOTFOUND_MAP(session, ret);
}
@@ -197,9 +198,12 @@ __session_rename(WT_SESSION *wt_session,
session = (WT_SESSION_IMPL *)wt_session;
SESSION_API_CALL(session, rename, config, cfg);
+
+ WT_ERR(__wt_meta_track_on(session));
ret = __wt_schema_rename(session, uri, newname, cfg);
-err: API_END_NOTFOUND_MAP(session, ret);
+err: WT_TRET(__wt_meta_track_off(session, ret != 0));
+ API_END_NOTFOUND_MAP(session, ret);
}
/*
@@ -220,9 +224,11 @@ __session_drop(WT_SESSION *wt_session, const char *uri, const char *config)
/* Dropping snapshots is a different code path. */
WT_ERR(__wt_config_gets(session, cfg, "snapshot", &cval));
+ __wt_spin_lock(session, &S2C(session)->schema_lock);
ret = (cval.len == 0) ? __wt_schema_drop(session, uri, cfg) :
__wt_schema_worker(
session, uri, cfg, __wt_snapshot_drop, WT_BTREE_SNAPSHOT_OP);
+ __wt_spin_unlock(session, &S2C(session)->schema_lock);
err: /* Note: drop operations cannot be unrolled (yet?). */
WT_TRET(__wt_meta_track_off(session, 0));
@@ -241,8 +247,10 @@ __session_dumpfile(WT_SESSION *wt_session, const char *uri, const char *config)
session = (WT_SESSION_IMPL *)wt_session;
SESSION_API_CALL(session, dumpfile, config, cfg);
+ __wt_spin_lock(session, &S2C(session)->schema_lock);
ret = __wt_schema_worker(session, uri, cfg,
__wt_dumpfile, WT_BTREE_EXCLUSIVE | WT_BTREE_VERIFY);
+ __wt_spin_unlock(session, &S2C(session)->schema_lock);
err: API_END_NOTFOUND_MAP(session, ret);
}
@@ -260,8 +268,10 @@ __session_salvage(WT_SESSION *wt_session, const char *uri, const char *config)
session = (WT_SESSION_IMPL *)wt_session;
SESSION_API_CALL(session, salvage, config, cfg);
+ __wt_spin_lock(session, &S2C(session)->schema_lock);
ret = __wt_schema_worker(session, uri, cfg,
__wt_salvage, WT_BTREE_EXCLUSIVE | WT_BTREE_SALVAGE);
+ __wt_spin_unlock(session, &S2C(session)->schema_lock);
err: API_END_NOTFOUND_MAP(session, ret);
}
@@ -281,10 +291,12 @@ __session_sync(WT_SESSION *wt_session, const char *uri, const char *config)
SESSION_API_CALL(session, sync, config, cfg);
WT_ERR(__wt_meta_track_on(session));
+ __wt_spin_lock(session, &S2C(session)->schema_lock);
ret = __wt_schema_worker(
session, uri, cfg, __wt_snapshot, WT_BTREE_SNAPSHOT_OP);
+ __wt_spin_unlock(session, &S2C(session)->schema_lock);
-err: WT_TRET(__wt_meta_track_off(session, ret != 0));
+err: WT_TRET(__wt_meta_track_off(session, ret != 0));
API_END_NOTFOUND_MAP(session, ret);
}
@@ -369,8 +381,10 @@ __session_upgrade(WT_SESSION *wt_session, const char *uri, const char *config)
session = (WT_SESSION_IMPL *)wt_session;
SESSION_API_CALL(session, upgrade, config, cfg);
+ __wt_spin_lock(session, &S2C(session)->schema_lock);
ret = __wt_schema_worker(session, uri, cfg,
__wt_upgrade, WT_BTREE_EXCLUSIVE | WT_BTREE_UPGRADE);
+ __wt_spin_unlock(session, &S2C(session)->schema_lock);
err: API_END_NOTFOUND_MAP(session, ret);
}
@@ -388,8 +402,10 @@ __session_verify(WT_SESSION *wt_session, const char *uri, const char *config)
session = (WT_SESSION_IMPL *)wt_session;
SESSION_API_CALL(session, verify, config, cfg);
+ __wt_spin_lock(session, &S2C(session)->schema_lock);
ret = __wt_schema_worker(session, uri, cfg,
__wt_verify, WT_BTREE_EXCLUSIVE | WT_BTREE_VERIFY);
+ __wt_spin_unlock(session, &S2C(session)->schema_lock);
err: API_END_NOTFOUND_MAP(session, ret);
}
View
9 src/session/session_btree.c
@@ -116,6 +116,7 @@ __wt_session_release_btree(WT_SESSION_IMPL *session)
F_CLR(btree, WT_BTREE_EXCLUSIVE);
__wt_rwunlock(session, btree->rwlock);
+ session->btree = NULL;
return (ret);
}
@@ -171,8 +172,12 @@ __wt_session_get_btree(WT_SESSION_IMPL *session,
return (0);
if ((ret =
- __wt_session_lock_btree(session, flags)) != WT_NOTFOUND)
+ __wt_session_lock_btree(session, flags)) != WT_NOTFOUND) {
+ WT_ASSERT(session, ret != 0 ||
+ LF_ISSET(WT_BTREE_EXCLUSIVE) ==
+ F_ISSET(session->btree, WT_BTREE_EXCLUSIVE));
return (ret);
+ }
ret = 0;
}
@@ -183,6 +188,8 @@ __wt_session_get_btree(WT_SESSION_IMPL *session,
WT_ASSERT(session, LF_ISSET(WT_BTREE_LOCK_ONLY) ||
F_ISSET(session->btree, WT_BTREE_OPEN));
+ WT_ASSERT(session, LF_ISSET(WT_BTREE_EXCLUSIVE) ==
+ F_ISSET(session->btree, WT_BTREE_EXCLUSIVE));
return (0);
}
View
25 test/fops/t.c
@@ -28,6 +28,7 @@ main(int argc, char *argv[])
u_int nthreads;
int ch, cnt, runs;
char *config_open;
+ const char **objp, *objs[] = { "file:__wt", "table:__wt", NULL };
if ((progname = strrchr(argv[0], '/')) == NULL)
progname = argv[0];
@@ -76,21 +77,15 @@ main(int argc, char *argv[])
for (cnt = 1; runs == 0 || cnt <= runs; ++cnt) {
shutdown(); /* Clean up previous runs */
- uri = "file:__wt";
- printf(" %d: %u threads on %s\n", cnt, nthreads, uri);
- wt_startup(config_open);
- if (fop_start(nthreads))
- return (EXIT_FAILURE);
- wt_shutdown();
- printf("\n");
-
- uri = "table:__wt";
- printf(" %d: %u threads on %s\n", cnt, nthreads, uri);
- wt_startup(config_open);
- if (fop_start(nthreads))
- return (EXIT_FAILURE);
- wt_shutdown();
- printf("\n");
+ for (objp = objs; *objp != NULL; objp++) {
+ uri = *objp;
+ printf("%5d: %u threads on %s\n", cnt, nthreads, uri);
+ wt_startup(config_open);
+ if (fop_start(nthreads))
+ return (EXIT_FAILURE);
+ wt_shutdown();
+ printf("\n");
+ }
}
return (0);
}
Please sign in to comment.
Something went wrong with that request. Please try again.