Navigation Menu

Skip to content

Commit

Permalink
Verify ZED detects physically removed L2ARC device
Browse files Browse the repository at this point in the history
This commit adds a new test case to the ZFS Test Suite to verify ZED
can detect when a cache device is physically removed from a running
system.

Signed-off-by: loli10K <ezomori.nozomu@gmail.com>
Requires-builders: test
  • Loading branch information
loli10K committed Sep 23, 2018
1 parent dda5500 commit 11324b6
Show file tree
Hide file tree
Showing 9 changed files with 201 additions and 17 deletions.
10 changes: 5 additions & 5 deletions TEST
Expand Up @@ -5,21 +5,21 @@
#TEST_PREPARE_SHARES="yes"

### ztest
#TEST_ZTEST_SKIP="yes"
TEST_ZTEST_SKIP="yes"
#TEST_ZTEST_TIMEOUT=1800
#TEST_ZTEST_DIR="/var/tmp/"
#TEST_ZTEST_OPTIONS="-V"
#TEST_ZTEST_CORE_DIR="/mnt/zloop"

### zimport
#TEST_ZIMPORT_SKIP="yes"
TEST_ZIMPORT_SKIP="yes"
#TEST_ZIMPORT_DIR="/var/tmp/zimport"
#TEST_ZIMPORT_VERSIONS="master installed"
#TEST_ZIMPORT_POOLS="zol-0.6.1 zol-0.6.2 master installed"
#TEST_ZIMPORT_OPTIONS="-c"

### xfstests
#TEST_XFSTESTS_SKIP="yes"
TEST_XFSTESTS_SKIP="yes"
#TEST_XFSTESTS_URL="https://github.com/behlendorf/xfstests/archive/"
#TEST_XFSTESTS_VER="zfs.tar.gz"
#TEST_XFSTESTS_POOL="tank"
Expand All @@ -34,11 +34,11 @@
#TEST_ZFSTESTS_DISKSIZE="8G"
#TEST_ZFSTESTS_ITERS="1"
#TEST_ZFSTESTS_OPTIONS="-vx"
#TEST_ZFSTESTS_RUNFILE="linux.run"
TEST_ZFSTESTS_RUNFILE="pull-7926.run"
#TEST_ZFSTESTS_TAGS="functional"

### zfsstress
#TEST_ZFSSTRESS_SKIP="yes"
TEST_ZFSSTRESS_SKIP="yes"
#TEST_ZFSSTRESS_URL="https://github.com/nedbass/zfsstress/archive/"
#TEST_ZFSSTRESS_VER="master.tar.gz"
#TEST_ZFSSTRESS_RUNTIME=300
Expand Down
62 changes: 56 additions & 6 deletions cmd/zed/agents/zfs_agents.c
Expand Up @@ -57,9 +57,11 @@ typedef struct guid_search {
uint64_t gs_pool_guid;
uint64_t gs_vdev_guid;
char *gs_devid;
boolean_t is_spare;
boolean_t is_l2arc;
} guid_search_t;

static void
static boolean_t
zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
{
guid_search_t *gsp = arg;
Expand All @@ -73,8 +75,27 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
&child, &children) == 0) {
for (c = 0; c < children; c++)
zfs_agent_iter_vdev(zhp, child[c], gsp);
return;
if (zfs_agent_iter_vdev(zhp, child[c], gsp))
return (B_TRUE);
}
/*
* Iterate over any spares and cache devices
*/
if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES,
&child, &children) == 0) {
for (c = 0; c < children; c++)
if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
gsp->is_spare = B_TRUE;
return (B_TRUE);
}
}
if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE,
&child, &children) == 0) {
for (c = 0; c < children; c++)
if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
gsp->is_l2arc = B_TRUE;
return (B_TRUE);
}
}
/*
* On a devid match, grab the vdev guid
Expand All @@ -84,7 +105,10 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
(strcmp(gsp->gs_devid, path) == 0)) {
(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
&gsp->gs_vdev_guid);
return (B_TRUE);
}

return (B_FALSE);
}

static int
Expand All @@ -93,13 +117,16 @@ zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
guid_search_t *gsp = arg;
nvlist_t *config, *nvl;

fprintf(stderr, "zfs_agent_iter_pool: search %lu\n", gsp->gs_vdev_guid);
/*
* For each vdev in this pool, look for a match by devid
*/
if ((config = zpool_get_config(zhp, NULL)) != NULL) {
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvl) == 0) {
zfs_agent_iter_vdev(zhp, nvl, gsp);
fprintf(stderr, "zfs_agent_iter_pool: vdev_tree\n");
nvlist_print(stderr, nvl);
(void) zfs_agent_iter_vdev(zhp, nvl, gsp);
}
}
/*
Expand All @@ -108,6 +135,9 @@ zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
if (gsp->gs_vdev_guid) {
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
&gsp->gs_pool_guid);
fprintf(stderr, "zfs_agent_iter_pool: match found for %lu\n", gsp->gs_vdev_guid);
} else {
fprintf(stderr, "zfs_agent_iter_pool: match not found for %lu\n", gsp->gs_vdev_guid);
}

zpool_close(zhp);
Expand All @@ -118,6 +148,7 @@ void
zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
{
agent_event_t *event;
boolean_t is_spare = B_FALSE, is_l2arc = B_FALSE;

if (subclass == NULL)
subclass = "";
Expand Down Expand Up @@ -157,32 +188,51 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);

/*
* For multipath, ZFS_EV_VDEV_GUID is missing so find it.
* For multipath, spares and L2ARC devices ZFS_EV_VDEV_GUID is
* missing so find it.
*/
if (vdev_guid == 0) {
guid_search_t search = { 0 };
int ret = 0;

(void) nvlist_lookup_string(nvl, DEV_IDENTIFIER,
&search.gs_devid);
fprintf(stderr, "zfs_agent_post_event: search by devid=%s\n", search.gs_devid);

(void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool,
ret = zpool_iter(g_zfs_hdl, zfs_agent_iter_pool,
&search);
fprintf(stderr, "zpool_iter: %d\n", ret);
pool_guid = search.gs_pool_guid;
vdev_guid = search.gs_vdev_guid;
is_spare = search.is_spare;
is_l2arc = search.is_l2arc;
}

(void) nvlist_add_uint64(payload,
FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid);
(void) nvlist_add_uint64(payload,
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid);
if (is_spare)
(void) nvlist_add_string(payload,
FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE);
if (is_l2arc)
(void) nvlist_add_string(payload,
FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
VDEV_TYPE_L2CACHE);

(void) gettimeofday(&tv, NULL);
tod[0] = tv.tv_sec;
tod[1] = tv.tv_usec;
(void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);

fprintf(stderr, "--- being raw udev ---\n");
nvlist_print(stderr, nvl);
fprintf(stderr, "--- end raw udev ---\n");
zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'",
EC_DEV_REMOVE, class);
fprintf(stderr, "--- being payload ---\n");
nvlist_print(stderr, payload);
fprintf(stderr, "--- end payload ---\n");
}

(void) strlcpy(event->ae_class, class, sizeof (event->ae_class));
Expand Down
37 changes: 37 additions & 0 deletions cmd/zed/agents/zfs_retire.c
Expand Up @@ -126,6 +126,15 @@ find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid)
return (ret);
}

if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
&child, &children) != 0)
return (NULL);

for (c = 0; c < children; c++) {
if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL)
return (ret);
}

return (NULL);
}

Expand Down Expand Up @@ -308,6 +317,8 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
* check for an available spare and continue.
*/
if (strcmp(class, "resource.fs.zfs.removed") == 0) {
char *type;

if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
&pool_guid) != 0 ||
nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
Expand All @@ -318,6 +329,32 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
&vdev)) == NULL)
return;

/* Can't replace a spare with another one: offline the device */
if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
&type) == 0 && strcmp(type, VDEV_TYPE_SPARE) == 0) {
char *dev_name;

dev_name = zpool_vdev_name(NULL, zhp, vdev, B_FALSE);
fmd_hdl_debug(hdl, "zpool_vdev_offline '%s'", dev_name);
zpool_vdev_offline(zhp, dev_name, B_TRUE);
free(dev_name);
zpool_close(zhp);
return;
}

/* Can't replace a l2arc with a spare: offline the device */
if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
&type) == 0 && strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
char *dev_name;

dev_name = zpool_vdev_name(NULL, zhp, vdev, B_FALSE);
fmd_hdl_debug(hdl, "zpool_vdev_offline '%s'", dev_name);
zpool_vdev_offline(zhp, dev_name, B_TRUE);
free(dev_name);
zpool_close(zhp);
return;
}

if (fmd_prop_get_int32(hdl, "spare_on_remove"))
replace_with_spare(hdl, zhp, vdev);
zpool_close(zhp);
Expand Down
2 changes: 1 addition & 1 deletion lib/libzfs/libzfs_config.c
Expand Up @@ -399,7 +399,7 @@ zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data)
* namespace AVL tree.
*/
if (!hdl->libzfs_pool_iter && namespace_reload(hdl) != 0)
return (-1);
return (-2);

hdl->libzfs_pool_iter++;
for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
Expand Down
6 changes: 4 additions & 2 deletions module/zfs/vdev_disk.c
Expand Up @@ -803,8 +803,10 @@ vdev_disk_io_done(zio_t *zio)
if (zio->io_error == EIO) {
vdev_t *v = zio->io_vd;
vdev_disk_t *vd = v->vdev_tsd;

if (check_disk_change(vd->vd_bdev)) {
int ret = check_disk_change(vd->vd_bdev);
zfs_dbgmsg("check_disk_change(%s) = %d",
vd->vd_bdev->bd_disk->disk_name, ret);
if (ret) {
vdev_bdev_invalidate(vd->vd_bdev);
v->vdev_remove_wanted = B_TRUE;
spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
Expand Down
7 changes: 4 additions & 3 deletions tests/runfiles/linux.run
Expand Up @@ -536,9 +536,10 @@ tests = ['exec_001_pos', 'exec_002_neg']
tags = ['functional', 'exec']

[tests/functional/fault]
tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos',
'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple',
'scrub_after_resilver', 'decrypt_fault', 'decompress_fault']
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_replace_001_pos',
'auto_spare_001_pos', 'auto_spare_002_pos', 'auto_spare_ashift',
'auto_spare_multiple', 'scrub_after_resilver', 'decrypt_fault',
'decompress_fault']
tags = ['functional', 'fault']

[tests/functional/features/async_destroy]
Expand Down
16 changes: 16 additions & 0 deletions tests/runfiles/pull-7926.run
@@ -0,0 +1,16 @@
[DEFAULT]
pre = setup
quiet = False
pre_user = root
user = root
timeout = 600
post_user = root
post = cleanup
outputdir = /var/tmp/test_results

[tests/functional/fault]
tests = ['auto_offline_001_pos', 'auto_offline_001_pos', 'auto_offline_001_pos',
'auto_offline_001_pos', 'auto_offline_001_pos', 'auto_offline_001_pos',
'auto_offline_001_pos', 'auto_offline_001_pos', 'auto_offline_001_pos']
tags = ['functional']

1 change: 1 addition & 0 deletions tests/zfs-tests/tests/functional/fault/Makefile.am
Expand Up @@ -2,6 +2,7 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/fault
dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
auto_offline_001_pos.ksh \
auto_online_001_pos.ksh \
auto_replace_001_pos.ksh \
auto_spare_001_pos.ksh \
Expand Down
77 changes: 77 additions & 0 deletions tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh
@@ -0,0 +1,77 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#

#
# Copyright (c) 2017 by Intel Corporation. All rights reserved.
# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/fault/fault.cfg

#
# DESCRIPTION:
# Testing Fault Management Agent ZED Logic - Physically removed L2ARC device
# is offlined
#
# STRATEGY:
# 1. Create a pool with a L2ARC device
# 2. Simulate physical removal of L2ARC device
# 3. Verify the device is offlined
#
verify_runnable "both"

if is_linux; then
load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b'
else
log_unsupported "scsi debug module unsupported"
fi

function cleanup
{
destroy_pool $TESTPOOL
rm -f $FILE_DEVICE
unload_scsi_debug
}

log_assert "ZED detects physically removed L2ARC device"

log_onexit cleanup

FILE_DEVICE="$TEST_BASE_DIR/file-vdev"
L2ARC_DEVICE=$(get_debug_device)

# 1. Create a pool with a L2ARC device
truncate -s $SPA_MINDEVSIZE $FILE_DEVICE
log_must zpool create $TESTPOOL $FILE_DEVICE cache $L2ARC_DEVICE

# 2. Simulate physical removal of L2ARC device
remove_disk $L2ARC_DEVICE

# 3. Verify the device is offlined
#log_must wait_vdev_state $TESTPOOL $L2ARC_DEVICE "OFFLINE"
wait_vdev_state $TESTPOOL $L2ARC_DEVICE "OFFLINE"
ret=$?
if [[ $ret != 0 ]]; then
zpool status
zpool status -g
zpool status -LP
cat $ZED_LOG
truncate -s 0 $ZED_LOG
log_fail ""
fi

log_pass "ZED detects physically removed L2ARC device"

0 comments on commit 11324b6

Please sign in to comment.