Skip to content

Commit

Permalink
Add support for asynchronous zvol minor operations
Browse files Browse the repository at this point in the history
zfsonlinux issue #2217 - zvol minor operations: check snapdev
property before traversing snapshots of a dataset

zfsonlinux issue #3681 - lock order inversion between zvol_open()
and dsl_pool_sync()...zvol_rename_minors()

Create a per-pool zvol taskq for asynchronous zvol tasks.
There are a few key design decisions to be aware of.

* Each taskq must be single threaded to ensure tasks are always
  processed in the order in which they were dispatched.

* There is a taskq per-pool in order to keep the pools independent.
  This way if one pool is suspended it will not impact another.

* The preferred location to dispatch a zvol minor task is a sync
  task.  In this context there is easy access to the spa_t and
  minimal error handling is required because the sync task must
  succeed.

Support for asynchronous zvol minor operations address issue #3681.

Signed-off-by: Boris Protopopov <boris.protopopov@actifio.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #2217
Closes #3678
Closes #3681
  • Loading branch information
bprotopopov authored and behlendorf committed Mar 10, 2016
1 parent eb08567 commit a0bd735
Show file tree
Hide file tree
Showing 12 changed files with 482 additions and 214 deletions.
2 changes: 2 additions & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/

#ifndef _SYS_SPA_IMPL_H
Expand Down Expand Up @@ -253,6 +254,7 @@ struct spa {
uint64_t spa_errata; /* errata issues detected */
spa_stats_t spa_stats; /* assorted spa statistics */
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
taskq_t *spa_zvol_taskq; /* Taskq for minor managment */

/*
* spa_refcount & spa_config_lock must be the last elements
Expand Down
15 changes: 7 additions & 8 deletions include/sys/zvol.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/

#ifndef _SYS_ZVOL_H
Expand All @@ -31,24 +32,22 @@
#define ZVOL_OBJ 1ULL
#define ZVOL_ZAP_OBJ 2ULL

#ifdef _KERNEL
extern void zvol_create_minors(spa_t *spa, const char *name, boolean_t async);
extern void zvol_remove_minors(spa_t *spa, const char *name, boolean_t async);
extern void zvol_rename_minors(spa_t *spa, const char *oldname,
const char *newname, boolean_t async);

#ifdef _KERNEL
extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
extern int zvol_check_volblocksize(const char *name, uint64_t volblocksize);
extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
extern boolean_t zvol_is_zvol(const char *);
extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
extern int zvol_create_minor(const char *name);
extern int zvol_create_minors(const char *name);
extern int zvol_remove_minor(const char *name);
extern void zvol_remove_minors(const char *name);
extern void zvol_rename_minors(const char *oldname, const char *newname);
extern int zvol_set_volsize(const char *, uint64_t);
extern int zvol_set_volblocksize(const char *, uint64_t);
extern int zvol_set_snapdev(const char *, uint64_t);
extern int zvol_set_snapdev(const char *, zprop_source_t, uint64_t);

extern int zvol_init(void);
extern void zvol_fini(void);

#endif /* _KERNEL */
#endif /* _SYS_ZVOL_H */
22 changes: 22 additions & 0 deletions lib/libzpool/kernel.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/

#include <assert.h>
Expand Down Expand Up @@ -1354,3 +1355,24 @@ spl_fstrans_check(void)
{
return (0);
}

void
zvol_create_minors(spa_t *spa, const char *name, boolean_t async)
{
}

void
zvol_remove_minor(spa_t *spa, const char *name, boolean_t async)
{
}

void
zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
{
}

void
zvol_rename_minors(spa_t *spa, const char *oldname, const char *newname,
boolean_t async)
{
}
4 changes: 4 additions & 0 deletions module/zfs/dmu_objset.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2015, STRATO AG, Inc. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/

/* Portions Copyright 2010 Robert Milkowski */
Expand Down Expand Up @@ -868,6 +869,8 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
}

spa_history_log_internal_ds(ds, "create", tx, "");
zvol_create_minors(dp->dp_spa, doca->doca_name, B_TRUE);

dsl_dataset_rele(ds, FTAG);
dsl_dir_rele(pdd, FTAG);
}
Expand Down Expand Up @@ -961,6 +964,7 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
dsl_dataset_name(origin, namebuf);
spa_history_log_internal_ds(ds, "clone", tx,
"origin=%s (%llu)", namebuf, origin->ds_object);
zvol_create_minors(dp->dp_spa, doca->doca_clone, B_TRUE);
dsl_dataset_rele(ds, FTAG);
dsl_dataset_rele(origin, FTAG);
dsl_dir_rele(pdd, FTAG);
Expand Down
3 changes: 3 additions & 0 deletions module/zfs/dmu_send.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/

#include <sys/dmu.h>
Expand Down Expand Up @@ -54,6 +55,7 @@
#include <sys/dsl_bookmark.h>
#include <sys/zfeature.h>
#include <sys/bqueue.h>
#include <sys/zvol.h>

/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
int zfs_send_corrupt_data = B_FALSE;
Expand Down Expand Up @@ -2646,6 +2648,7 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT;
}
drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
zvol_create_minors(dp->dp_spa, drc->drc_tofs, B_TRUE);
/*
* Release the hold from dmu_recv_begin. This must be done before
* we return to open context, so that when we free the dataset's dnode,
Expand Down
36 changes: 6 additions & 30 deletions module/zfs/dsl_dataset.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 RackTop Systems.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/

#include <sys/dmu_objset.h>
Expand Down Expand Up @@ -1424,6 +1425,7 @@ dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx)
dsl_props_set_sync_impl(ds->ds_prev,
ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx);
}
zvol_create_minors(dp->dp_spa, nvpair_name(pair), B_TRUE);
dsl_dataset_rele(ds, FTAG);
}
}
Expand Down Expand Up @@ -1498,16 +1500,6 @@ dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
fnvlist_free(suspended);
}

#ifdef _KERNEL
if (error == 0) {
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
pair = nvlist_next_nvpair(snaps, pair)) {
char *snapname = nvpair_name(pair);
zvol_create_minors(snapname);
}
}
#endif

return (error);
}

Expand Down Expand Up @@ -1930,6 +1922,8 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
VERIFY0(zap_add(dp->dp_meta_objset,
dsl_dataset_phys(hds)->ds_snapnames_zapobj,
ds->ds_snapname, 8, 1, &ds->ds_object, tx));
zvol_rename_minors(dp->dp_spa, ddrsa->ddrsa_oldsnapname,
ddrsa->ddrsa_newsnapname, B_TRUE);

dsl_dataset_rele(ds, FTAG);
return (0);
Expand Down Expand Up @@ -1958,34 +1952,16 @@ int
dsl_dataset_rename_snapshot(const char *fsname,
const char *oldsnapname, const char *newsnapname, boolean_t recursive)
{
#ifdef _KERNEL
char *oldname, *newname;
#endif
int error;

dsl_dataset_rename_snapshot_arg_t ddrsa;

ddrsa.ddrsa_fsname = fsname;
ddrsa.ddrsa_oldsnapname = oldsnapname;
ddrsa.ddrsa_newsnapname = newsnapname;
ddrsa.ddrsa_recursive = recursive;

error = dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check,
return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check,
dsl_dataset_rename_snapshot_sync, &ddrsa,
1, ZFS_SPACE_CHECK_RESERVED);

if (error)
return (SET_ERROR(error));

#ifdef _KERNEL
oldname = kmem_asprintf("%s@%s", fsname, oldsnapname);
newname = kmem_asprintf("%s@%s", fsname, newsnapname);
zvol_rename_minors(oldname, newname);
strfree(newname);
strfree(oldname);
#endif

return (0);
1, ZFS_SPACE_CHECK_RESERVED));
}

/*
Expand Down
8 changes: 5 additions & 3 deletions module/zfs/dsl_destroy.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2013 by Joyent, Inc. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/

#include <sys/zfs_context.h>
Expand All @@ -40,6 +41,7 @@
#include <sys/zfs_ioctl.h>
#include <sys/dsl_deleg.h>
#include <sys/dmu_impl.h>
#include <sys/zvol.h>

typedef struct dmu_snapshots_destroy_arg {
nvlist_t *dsda_snaps;
Expand Down Expand Up @@ -243,9 +245,6 @@ dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
void
dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
{
#ifdef ZFS_DEBUG
int err;
#endif
spa_feature_t f;
int after_branch_point = FALSE;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
Expand Down Expand Up @@ -441,6 +440,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
#ifdef ZFS_DEBUG
{
uint64_t val;
int err;

err = dsl_dataset_snap_lookup(ds_head,
ds->ds_snapname, &val);
Expand Down Expand Up @@ -490,6 +490,7 @@ dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));

dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx);
zvol_remove_minors(dp->dp_spa, nvpair_name(pair), B_TRUE);
dsl_dataset_rele(ds, FTAG);
}
}
Expand Down Expand Up @@ -889,6 +890,7 @@ dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)

VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
dsl_destroy_head_sync_impl(ds, tx);
zvol_remove_minors(dp->dp_spa, ddha->ddha_name, B_TRUE);
dsl_dataset_rele(ds, FTAG);
}

Expand Down
6 changes: 3 additions & 3 deletions module/zfs/dsl_dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* Copyright (c) 2013 Martin Matuska. All rights reserved.
* Copyright (c) 2014 Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/

#include <sys/dmu.h>
Expand Down Expand Up @@ -1909,9 +1910,8 @@ dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
VERIFY0(zap_add(mos, dsl_dir_phys(newparent)->dd_child_dir_zapobj,
dd->dd_myname, 8, 1, &dd->dd_object, tx));

#ifdef _KERNEL
zvol_rename_minors(ddra->ddra_oldname, ddra->ddra_newname);
#endif
zvol_rename_minors(dp->dp_spa, ddra->ddra_oldname,
ddra->ddra_newname, B_TRUE);

dsl_prop_notify_all(dd);

Expand Down
37 changes: 30 additions & 7 deletions module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013, 2014, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/

/*
Expand Down Expand Up @@ -1136,6 +1137,24 @@ spa_activate(spa_t *spa, int mode)
avl_create(&spa->spa_errlist_last,
spa_error_entry_compare, sizeof (spa_error_entry_t),
offsetof(spa_error_entry_t, se_avl));

/*
* This taskq is used to perform zvol-minor-related tasks
* asynchronously. This has several advantages, including easy
* resolution of various deadlocks (zfsonlinux bug #3681).
*
* The taskq must be single threaded to ensure tasks are always
* processed in the order in which they were dispatched.
*
* A taskq per pool allows one to keep the pools independent.
* This way if one pool is suspended, it will not impact another.
*
* The preferred location to dispatch a zvol minor task is a sync
* task. In this context, there is easy access to the spa_t and minimal
* error handling is required because the sync task must succeed.
*/
spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri,
1, INT_MAX, 0);
}

/*
Expand All @@ -1154,6 +1173,11 @@ spa_deactivate(spa_t *spa)

spa_evicting_os_wait(spa);

if (spa->spa_zvol_taskq) {
taskq_destroy(spa->spa_zvol_taskq);
spa->spa_zvol_taskq = NULL;
}

txg_list_destroy(&spa->spa_vdev_txg_list);

list_destroy(&spa->spa_config_dirty_list);
Expand Down Expand Up @@ -3088,10 +3112,8 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
mutex_exit(&spa_namespace_lock);
}

#ifdef _KERNEL
if (firstopen)
zvol_create_minors(spa->spa_name);
#endif
zvol_create_minors(spa, spa_name(spa), B_TRUE);

*spapp = spa;

Expand Down Expand Up @@ -4211,10 +4233,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)

mutex_exit(&spa_namespace_lock);
spa_history_log_version(spa, "import");

#ifdef _KERNEL
zvol_create_minors(pool);
#endif
zvol_create_minors(spa, pool, B_TRUE);

return (0);
}
Expand Down Expand Up @@ -4349,6 +4368,10 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
spa_open_ref(spa, FTAG);
mutex_exit(&spa_namespace_lock);
spa_async_suspend(spa);
if (spa->spa_zvol_taskq) {
zvol_remove_minors(spa, spa_name(spa), B_TRUE);
taskq_wait(spa->spa_zvol_taskq);
}
mutex_enter(&spa_namespace_lock);
spa_close(spa, FTAG);

Expand Down
Loading

0 comments on commit a0bd735

Please sign in to comment.