Permalink
Browse files

Prefer non-zero split versions for reconstruction

As optimization when performing split block reconstruction prefer
non-zero versions of the splits.  The reasoning behind this is
that zeroed splits are unlikely to be correct since most blocks
are expected to either be compressed or encrypted.  A zeroed
split version is most likely never written previously free space.
This case was observed when running ztest which uses sparse files.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
  • Loading branch information...
behlendorf committed Nov 8, 2018
1 parent d8244d3 commit c8332cd00f8dd0f98943ce90f37de755639bb4a3
Showing with 64 additions and 17 deletions.
  1. +7 −0 include/sys/abd.h
  2. +22 −0 module/zfs/abd.c
  3. +34 −4 module/zfs/vdev_indirect.c
  4. +1 −13 module/zfs/zio_compress.c
@@ -114,6 +114,7 @@ void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t);
int abd_cmp(abd_t *, abd_t *);
int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t);
void abd_zero_off(abd_t *, size_t, size_t);
boolean_t abd_is_zeroed_off(abd_t *, size_t, size_t);
#if defined(_KERNEL)
unsigned int abd_scatter_bio_map_off(struct bio *, abd_t *, unsigned int,
@@ -164,6 +165,12 @@ abd_zero(abd_t *abd, size_t size)
abd_zero_off(abd, 0, size);
}
static inline boolean_t
abd_is_zeroed(abd_t *abd)
{
return (abd_is_zeroed_off(abd, 0, abd->abd_size));
}
/*
* Module lifecycle
*/
@@ -1215,6 +1215,28 @@ abd_zero_off(abd_t *abd, size_t off, size_t size)
(void) abd_iterate_func(abd, off, size, abd_zero_off_cb, NULL);
}
/*ARGSUSED*/
static int
abd_zeroed_cb(void *buf, size_t size, void *private)
{
uint64_t *end = (uint64_t *)((char *)buf + size);
for (uint64_t *word = (uint64_t *)buf; word < end; word++)
if (*word != 0)
return (1);
return (0);
}
/*
* Check if an abd contains only zeros.
*/
boolean_t
abd_is_zeroed_off(abd_t *abd, size_t off, size_t size)
{
return (abd_iterate_func(abd, off, size, abd_zeroed_cb, NULL) == 0);
}
/*
* Iterate over two ABDs and call func incrementally on the two ABDs' data in
* equal-sized chunks (passed to func as raw buffers). func could be called many
@@ -1503,6 +1503,21 @@ vdev_indirect_splits_checksum_validate(indirect_vsd_t *iv, zio_t *zio)
return (zio_checksum_error(zio, &zbc));
}
/*
* The first entry for each unique split version is our best guess which
* version is correct, it will always be checked first.
*/
static int
vdev_indirect_splits_enumerate_first(indirect_vsd_t *iv, zio_t *zio)
{
for (indirect_split_t *is = list_head(&iv->iv_splits);
is != NULL; is = list_next(&iv->iv_splits, is)) {
is->is_good_child = list_head(&is->is_unique_child);
}
return (vdev_indirect_splits_checksum_validate(iv, zio));
}
/*
* There are relatively few possible combinations making it feasible to
* deterministically check them all. We do this by setting the good_child
@@ -1761,7 +1776,18 @@ vdev_indirect_reconstruct_io_done(zio_t *zio)
}
is->is_unique_children++;
list_insert_tail(&is->is_unique_child, ic_i);
/*
* Split versions of all zeros are less likely to be
* correct since most blocks are expected to either be
* compressed or encrypted. These versions are most
* likely to be free space which was never written to.
* As an optimization check these after non-zero data.
*/
if (abd_is_zeroed(ic_i->ic_data))
list_insert_tail(&is->is_unique_child, ic_i);
else
list_insert_head(&is->is_unique_child, ic_i);
}
/* Reconstruction is impossible, no valid children */
@@ -1777,10 +1803,14 @@ vdev_indirect_reconstruct_io_done(zio_t *zio)
iv->iv_unique_combinations *= is->is_unique_children;
}
if (iv->iv_unique_combinations <= iv->iv_attempts_max)
if (iv->iv_unique_combinations <= iv->iv_attempts_max) {
error = vdev_indirect_splits_enumerate_all(iv, zio);
else
error = vdev_indirect_splits_enumerate_randomly(iv, zio);
} else {
error = vdev_indirect_splits_enumerate_first(iv, zio);
if (error != 0)
error = vdev_indirect_splits_enumerate_randomly(iv,
zio);
}
if (error != 0) {
/* All attempted combinations failed. */
@@ -89,18 +89,6 @@ zio_compress_select(spa_t *spa, enum zio_compress child,
return (result);
}
/*ARGSUSED*/
static int
zio_compress_zeroed_cb(void *data, size_t len, void *private)
{
uint64_t *end = (uint64_t *)((char *)data + len);
for (uint64_t *word = (uint64_t *)data; word < end; word++)
if (*word != 0)
return (1);
return (0);
}
size_t
zio_compress_data(enum zio_compress c, abd_t *src, void *dst, size_t s_len)
{
@@ -114,7 +102,7 @@ zio_compress_data(enum zio_compress c, abd_t *src, void *dst, size_t s_len)
* If the data is all zeroes, we don't even need to allocate
* a block for it. We indicate this by returning zero size.
*/
if (abd_iterate_func(src, 0, s_len, zio_compress_zeroed_cb, NULL) == 0)
if (abd_is_zeroed_off(src, 0, s_len))
return (0);
if (c == ZIO_COMPRESS_EMPTY)

0 comments on commit c8332cd

Please sign in to comment.