Skip to content

Commit fbeddd6

Browse files
ahrensbehlendorf
authored andcommitted
Illumos 4390 - I/O errors can corrupt space map when deleting fs/vol
4390 i/o errors when deleting filesystem/zvol can lead to space map corruption Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Dan McDonald <danmcd@omniti.com> Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com> Approved by: Dan McDonald <danmcd@omniti.com> References: https://www.illumos.org/issues/4390 illumos/illumos-gate@7fd05ac Porting notes: Previous stack-reduction efforts in traverse_visitb() caused a fair number of un-mergable pieces of code. This patch should reduce its stack footprint a bit more. The new local bptree_entry_phys_t in bptree_add() is dynamically-allocated using kmem_zalloc() for the purpose of stack reduction. The new global zfs_free_leak_on_eio has been defined as an integer rather than a boolean_t as was the case with the related zfs_recover global. Also, zfs_free_leak_on_eio's definition has been inserted into zfs_debug.c for consistency with the existing definition of zfs_recover. Illumos placed it in spa_misc.c. Ported by: Tim Chase <tim@chase2k.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2545
1 parent 9b67f60 commit fbeddd6

File tree

17 files changed

+339
-157
lines changed

17 files changed

+339
-157
lines changed

include/sys/bptree.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
* CDDL HEADER END
2020
*/
2121
/*
22-
* Copyright (c) 2012 by Delphix. All rights reserved.
22+
* Copyright (c) 2013 by Delphix. All rights reserved.
2323
*/
2424

2525
#ifndef _SYS_BPTREE_H
@@ -50,6 +50,7 @@ typedef int bptree_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
5050

5151
uint64_t bptree_alloc(objset_t *os, dmu_tx_t *tx);
5252
int bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx);
53+
boolean_t bptree_is_empty(objset_t *os, uint64_t obj);
5354

5455
void bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg,
5556
uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx);

include/sys/dmu.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,6 @@ void zfs_znode_byteswap(void *buf, size_t size);
250250

251251
#define DMU_USERUSED_OBJECT (-1ULL)
252252
#define DMU_GROUPUSED_OBJECT (-2ULL)
253-
#define DMU_DEADLIST_OBJECT (-3ULL)
254253

255254
/*
256255
* artificial blkids for bonus buffer and spill blocks

include/sys/dsl_dir.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value,
144144
#define ORIGIN_DIR_NAME "$ORIGIN"
145145
#define XLATION_DIR_NAME "$XLATION"
146146
#define FREE_DIR_NAME "$FREE"
147+
#define LEAK_DIR_NAME "$LEAK"
147148

148149
#ifdef ZFS_DEBUG
149150
#define dprintf_dd(dd, fmt, ...) do { \

include/sys/dsl_pool.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ typedef struct dsl_pool {
8787
struct dsl_dir *dp_root_dir;
8888
struct dsl_dir *dp_mos_dir;
8989
struct dsl_dir *dp_free_dir;
90+
struct dsl_dir *dp_leak_dir;
9091
struct dsl_dataset *dp_origin_snap;
9192
uint64_t dp_root_dir_obj;
9293
struct taskq *dp_iput_taskq;

include/sys/dsl_scan.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ typedef struct dsl_scan {
116116
/* for freeing blocks */
117117
boolean_t scn_is_bptree;
118118
boolean_t scn_async_destroying;
119+
boolean_t scn_async_stalled;
119120

120121
/* for debugging / information */
121122
uint64_t scn_visited_this_txg;

include/sys/fs/zfs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ typedef enum {
193193
ZPOOL_PROP_COMMENT,
194194
ZPOOL_PROP_EXPANDSZ,
195195
ZPOOL_PROP_FREEING,
196+
ZPOOL_PROP_LEAKED,
196197
ZPOOL_NUM_PROPS
197198
} zpool_prop_t;
198199

include/sys/zfs_debug.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ extern "C" {
4848

4949
extern int zfs_flags;
5050
extern int zfs_recover;
51+
extern int zfs_free_leak_on_eio;
5152

5253
#define ZFS_DEBUG_DPRINTF (1<<0)
5354
#define ZFS_DEBUG_DBUF_VERIFY (1<<1)

lib/libzfs/libzfs_pool.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ zpool_get_prop_literal(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
316316
case ZPOOL_PROP_ALLOCATED:
317317
case ZPOOL_PROP_FREE:
318318
case ZPOOL_PROP_FREEING:
319+
case ZPOOL_PROP_LEAKED:
319320
case ZPOOL_PROP_EXPANDSZ:
320321
case ZPOOL_PROP_ASHIFT:
321322
if (literal)

man/man5/zfs-module-parameters.5

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,43 @@ Set additional debugging flags
696696
Default value: \fB1\fR.
697697
.RE
698698

699+
.sp
700+
.ne 2
701+
.na
702+
\fBzfs_free_leak_on_eio\fR (int)
703+
.ad
704+
.RS 12n
705+
If destroy encounters an EIO while reading metadata (e.g. indirect
706+
blocks), space referenced by the missing metadata can not be freed.
707+
Normally this causes the background destroy to become "stalled", as
708+
it is unable to make forward progress. While in this stalled state,
709+
all remaining space to free from the error-encountering filesystem is
710+
"temporarily leaked". Set this flag to cause it to ignore the EIO,
711+
permanently leak the space from indirect blocks that can not be read,
712+
and continue to free everything else that it can.
713+
714+
The default, "stalling" behavior is useful if the storage partially
715+
fails (i.e. some but not all i/os fail), and then later recovers. In
716+
this case, we will be able to continue pool operations while it is
717+
partially failed, and when it recovers, we can continue to free the
718+
space, with no leaks. However, note that this case is actually
719+
fairly rare.
720+
721+
Typically pools either (a) fail completely (but perhaps temporarily,
722+
e.g. a top-level vdev going offline), or (b) have localized,
723+
permanent errors (e.g. disk returns the wrong data due to bit flip or
724+
firmware bug). In case (a), this setting does not matter because the
725+
pool will be suspended and the sync thread will not be able to make
726+
forward progress regardless. In case (b), because the error is
727+
permanent, the best we can do is leak the minimum amount of space,
728+
which is what setting this flag will do. Therefore, it is reasonable
729+
for this flag to normally be set, but we chose the more conservative
730+
approach of not setting it, so that there is no possibility of
731+
leaking space in the "partial temporary" failure case.
732+
.sp
733+
Default value: \fB0\fR.
734+
.RE
735+
699736
.sp
700737
.ne 2
701738
.na

module/zcommon/zpool_prop.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ zpool_prop_init(void)
8181
ZFS_TYPE_POOL, "<size>", "FREE");
8282
zprop_register_number(ZPOOL_PROP_FREEING, "freeing", 0, PROP_READONLY,
8383
ZFS_TYPE_POOL, "<size>", "FREEING");
84+
zprop_register_number(ZPOOL_PROP_LEAKED, "leaked", 0, PROP_READONLY,
85+
ZFS_TYPE_POOL, "<size>", "LEAKED");
8486
zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0,
8587
PROP_READONLY, ZFS_TYPE_POOL, "<size>", "ALLOC");
8688
zprop_register_number(ZPOOL_PROP_EXPANDSZ, "expandsize", 0,

0 commit comments

Comments
 (0)