Skip to content

Commit

Permalink
Illumos #3875
Browse files Browse the repository at this point in the history
3875 panic in zfs_root() after failed rollback
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Gordon Ross <gwr@nexenta.com>

References:
  https://www.illumos.org/issues/3875
  illumos/illumos-gate@91948b5

Ported-by: Richard Yao <ryao@gentoo.org>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #1775
  • Loading branch information
Keith M Wesolowski authored and behlendorf committed Nov 4, 2013
1 parent 1958067 commit 831baf0
Show file tree
Hide file tree
Showing 8 changed files with 167 additions and 70 deletions.
1 change: 1 addition & 0 deletions include/sys/dmu_objset.h
Expand Up @@ -136,6 +136,7 @@ struct objset {
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type,
boolean_t readonly, void *tag, objset_t **osp);
void dmu_objset_refresh_ownership(objset_t *os, void *tag);
void dmu_objset_rele(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, void *tag);
int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
Expand Down
3 changes: 2 additions & 1 deletion include/sys/dmu_send.h
Expand Up @@ -55,12 +55,13 @@ typedef struct dmu_recv_cookie {
struct avl_tree *drc_guid_to_ds_map;
zio_cksum_t drc_cksum;
uint64_t drc_newsnapobj;
void *drc_owner;
} dmu_recv_cookie_t;

int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
boolean_t force, char *origin, dmu_recv_cookie_t *drc);
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp,
int cleanup_fd, uint64_t *action_handlep);
int dmu_recv_end(dmu_recv_cookie_t *drc);
int dmu_recv_end(dmu_recv_cookie_t *drc, void *owner);

#endif /* _DMU_SEND_H */
4 changes: 2 additions & 2 deletions include/sys/dsl_dataset.h
Expand Up @@ -248,7 +248,7 @@ void dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag);
boolean_t dsl_dataset_long_held(dsl_dataset_t *ds);

int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
dsl_dataset_t *origin_head, boolean_t force);
dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx);
void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
dsl_dataset_t *origin_head, dmu_tx_t *tx);
int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
Expand All @@ -265,7 +265,7 @@ int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name,
int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx);
void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
zprop_source_t source, uint64_t value, dmu_tx_t *tx);
int dsl_dataset_rollback(const char *fsname);
int dsl_dataset_rollback(const char *fsname, void *owner);

#ifdef ZFS_DEBUG
#define dprintf_ds(ds, fmt, ...) do { \
Expand Down
32 changes: 32 additions & 0 deletions module/zfs/dmu_objset.c
Expand Up @@ -517,6 +517,38 @@ dmu_objset_rele(objset_t *os, void *tag)
dsl_pool_rele(dp, tag);
}

/*
* When we are called, os MUST refer to an objset associated with a dataset
* that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
* == tag. We will then release and reacquire ownership of the dataset while
* holding the pool config_rwlock to avoid intervening namespace or ownership
* changes may occur.
*
* This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to
* release the hold on its dataset and acquire a new one on the dataset of the
* same name so that it can be partially torn down and reconstructed.
*/
void
dmu_objset_refresh_ownership(objset_t *os, void *tag)
{
dsl_pool_t *dp;
dsl_dataset_t *ds, *newds;
char name[MAXNAMELEN];

ds = os->os_dsl_dataset;
VERIFY3P(ds, !=, NULL);
VERIFY3P(ds->ds_owner, ==, tag);
VERIFY(dsl_dataset_long_held(ds));

dsl_dataset_name(ds, name);
dp = dmu_objset_pool(os);
dsl_pool_config_enter(dp, FTAG);
dmu_objset_disown(os, tag);
VERIFY0(dsl_dataset_own(dp, name, tag, &newds));
VERIFY3P(newds, ==, os->os_dsl_dataset);
dsl_pool_config_exit(dp, FTAG);
}

void
dmu_objset_disown(objset_t *os, void *tag)
{
Expand Down
9 changes: 7 additions & 2 deletions module/zfs/dmu_send.c
Expand Up @@ -1612,7 +1612,7 @@ dmu_recv_end_check(void *arg, dmu_tx_t *tx)
}
}
error = dsl_dataset_clone_swap_check_impl(drc->drc_ds,
origin_head, drc->drc_force);
origin_head, drc->drc_force, drc->drc_owner, tx);
if (error != 0) {
dsl_dataset_rele(origin_head, FTAG);
return (error);
Expand Down Expand Up @@ -1685,6 +1685,9 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)

dsl_dataset_rele(origin_head, FTAG);
dsl_destroy_head_sync_impl(drc->drc_ds, tx);

if (drc->drc_owner != NULL)
VERIFY3P(origin_head->ds_owner, ==, drc->drc_owner);
} else {
dsl_dataset_t *ds = drc->drc_ds;

Expand Down Expand Up @@ -1787,8 +1790,10 @@ dmu_recv_new_end(dmu_recv_cookie_t *drc)
}

int
dmu_recv_end(dmu_recv_cookie_t *drc)
dmu_recv_end(dmu_recv_cookie_t *drc, void *owner)
{
drc->drc_owner = owner;

if (drc->drc_newfs)
return (dmu_recv_new_end(drc));
else
Expand Down
72 changes: 62 additions & 10 deletions module/zfs/dsl_dataset.c
Expand Up @@ -1669,16 +1669,52 @@ dsl_dataset_rename_snapshot(const char *fsname,
dsl_dataset_rename_snapshot_sync, &ddrsa, 1));
}

/*
* If we're doing an ownership handoff, we need to make sure that there is
* only one long hold on the dataset. We're not allowed to change anything here
* so we don't permanently release the long hold or regular hold here. We want
* to do this only when syncing to avoid the dataset unexpectedly going away
* when we release the long hold.
*/
static int
dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
{
boolean_t held;

if (!dmu_tx_is_syncing(tx))
return (0);

if (owner != NULL) {
VERIFY3P(ds->ds_owner, ==, owner);
dsl_dataset_long_rele(ds, owner);
}

held = dsl_dataset_long_held(ds);

if (owner != NULL)
dsl_dataset_long_hold(ds, owner);

if (held)
return (SET_ERROR(EBUSY));

return (0);
}

typedef struct dsl_dataset_rollback_arg {
const char *ddra_fsname;
void *ddra_owner;
} dsl_dataset_rollback_arg_t;

static int
dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
{
const char *fsname = arg;
dsl_dataset_rollback_arg_t *ddra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
int64_t unused_refres_delta;
int error;

error = dsl_dataset_hold(dp, fsname, FTAG, &ds);
error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds);
if (error != 0)
return (error);

Expand All @@ -1694,9 +1730,10 @@ dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
return (SET_ERROR(EINVAL));
}

if (dsl_dataset_long_held(ds)) {
error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx);
if (error != 0) {
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EBUSY));
return (error);
}

/*
Expand Down Expand Up @@ -1733,12 +1770,12 @@ dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
static void
dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
{
const char *fsname = arg;
dsl_dataset_rollback_arg_t *ddra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds, *clone;
uint64_t cloneobj;

VERIFY0(dsl_dataset_hold(dp, fsname, FTAG, &ds));
VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds));

cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback",
ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx);
Expand All @@ -1754,11 +1791,26 @@ dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
dsl_dataset_rele(ds, FTAG);
}

/*
* If owner != NULL:
*
* - The existing dataset MUST be owned by the specified owner at entry
* - Upon return, dataset will still be held by the same owner, whether we
* succeed or not.
*
* This mode is required any time the existing filesystem is mounted. See
* notes above zfs_suspend_fs() for further details.
*/
int
dsl_dataset_rollback(const char *fsname)
dsl_dataset_rollback(const char *fsname, void *owner)
{
dsl_dataset_rollback_arg_t ddra;

ddra.ddra_fsname = fsname;
ddra.ddra_owner = owner;

return (dsl_sync_task(fsname, dsl_dataset_rollback_check,
dsl_dataset_rollback_sync, (void *)fsname, 1));
dsl_dataset_rollback_sync, (void *)&ddra, 1));
}

struct promotenode {
Expand Down Expand Up @@ -2276,7 +2328,7 @@ dsl_dataset_promote(const char *name, char *conflsnap)

int
dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
dsl_dataset_t *origin_head, boolean_t force)
dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
{
int64_t unused_refres_delta;

Expand Down Expand Up @@ -2305,7 +2357,7 @@ dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
return (SET_ERROR(ETXTBSY));

/* origin_head should have no long holds (e.g. is not mounted) */
if (dsl_dataset_long_held(origin_head))
if (dsl_dataset_handoff_check(origin_head, owner, tx))
return (SET_ERROR(EBUSY));

/* check amount of any unconsumed refreservation */
Expand Down
15 changes: 9 additions & 6 deletions module/zfs/zfs_ioctl.c
Expand Up @@ -1349,7 +1349,7 @@ zfs_sb_hold(const char *name, void *tag, zfs_sb_t **zsbp, boolean_t writer)
/*
* XXX we could probably try again, since the unmounting
* thread should be just about to disassociate the
* objset from the zfsvfs.
* objset from the zsb.
*/
rrw_exit(&(*zsbp)->z_teardown_lock, tag);
return (SET_ERROR(EBUSY));
Expand Down Expand Up @@ -3504,13 +3504,13 @@ zfs_ioc_rollback(zfs_cmd_t *zc)
if (error == 0) {
int resume_err;

error = dsl_dataset_rollback(zc->zc_name);
error = dsl_dataset_rollback(zc->zc_name, zsb);
resume_err = zfs_resume_fs(zsb, zc->zc_name);
error = error ? error : resume_err;
}
deactivate_super(zsb->z_sb);
} else {
error = dsl_dataset_rollback(zc->zc_name);
error = dsl_dataset_rollback(zc->zc_name, NULL);
}
return (error);
}
Expand Down Expand Up @@ -4038,13 +4038,13 @@ zfs_ioc_recv(zfs_cmd_t *zc)
* If the suspend fails, then the recv_end will
* likely also fail, and clean up after itself.
*/
end_err = dmu_recv_end(&drc);
end_err = dmu_recv_end(&drc, zsb);
if (error == 0)
error = zfs_resume_fs(zsb, tofs);
error = error ? error : end_err;
deactivate_super(zsb->z_sb);
} else {
error = dmu_recv_end(&drc);
error = dmu_recv_end(&drc, NULL);
}
}

Expand Down Expand Up @@ -4528,8 +4528,11 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
* objset_phys_t). Suspend/resume the fs will do that.
*/
error = zfs_suspend_fs(zsb);
if (error == 0)
if (error == 0) {
dmu_objset_refresh_ownership(zsb->z_os,
zsb);
error = zfs_resume_fs(zsb, zc->zc_name);
}
}
if (error == 0)
error = dmu_objset_userspace_upgrade(zsb->z_os);
Expand Down

0 comments on commit 831baf0

Please sign in to comment.