Skip to content

Commit dea377c

Browse files
ahrensbehlendorf
authored andcommitted
Illumos 4970-4974 - extreme rewind enhancements
4970 need controls on i/o issued by zpool import -XF 4971 zpool import -T should accept hex values 4972 zpool import -T implies extreme rewind, and thus a scrub 4973 spa_load_retry retries the same txg 4974 spa_load_verify() reads all data twice Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Dan McDonald <danmcd@omniti.com> Reviewed by: George Wilson <george.wilson@delphix.com> Approved by: Robert Mustacchi <rm@joyent.com> References: https://www.illumos.org/issues/4970 https://www.illumos.org/issues/4971 https://www.illumos.org/issues/4972 https://www.illumos.org/issues/4973 https://www.illumos.org/issues/4974 illumos/illumos-gate@e42d205 Notes: This set of patches adds a set of tunable parameters for the "extreme rewind" mode of pool import which allows control over the traversal performed during such an import. Ported by: Tim Chase <tim@chase2k.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2598
1 parent 49ddb31 commit dea377c

File tree

3 files changed

+114
-15
lines changed

3 files changed

+114
-15
lines changed

cmd/zpool/zpool_main.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
/*
2323
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2424
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25-
* Copyright (c) 2013 by Delphix. All rights reserved.
25+
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
2626
* Copyright (c) 2012 by Frederik Wessels. All rights reserved.
2727
* Copyright (c) 2012 by Cyril Plisko. All rights reserved.
2828
*/
@@ -2082,7 +2082,7 @@ zpool_do_import(int argc, char **argv)
20822082

20832083
case 'T':
20842084
errno = 0;
2085-
txg = strtoull(optarg, &endptr, 10);
2085+
txg = strtoull(optarg, &endptr, 0);
20862086
if (errno != 0 || *endptr != '\0') {
20872087
(void) fprintf(stderr,
20882088
gettext("invalid txg value\n"));

man/man5/zfs-module-parameters.5

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,52 @@ they operate close to quota or capacity limits.
230230
Default value: 24
231231
.RE
232232

233+
.sp
234+
.ne 2
235+
.na
236+
\fBspa_load_verify_data\fR (int)
237+
.ad
238+
.RS 12n
239+
Whether to traverse data blocks during an "extreme rewind" (\fB-X\fR)
240+
import. Use 0 to disable and 1 to enable.
241+
242+
An extreme rewind import normally performs a full traversal of all
243+
blocks in the pool for verification. If this parameter is set to 0,
244+
the traversal skips non-metadata blocks. It can be toggled once the
245+
import has started to stop or start the traversal of non-metadata blocks.
246+
.sp
247+
Default value: 1
248+
.RE
249+
250+
.sp
251+
.ne 2
252+
.na
253+
\fBspa_load_verify_metadata\fR (int)
254+
.ad
255+
.RS 12n
256+
Whether to traverse blocks during an "extreme rewind" (\fB-X\fR)
257+
pool import. Use 0 to disable and 1 to enable.
258+
259+
An extreme rewind import normally performs a full traversal of all
260+
blocks in the pool for verification. If this parameter is set to 1,
261+
the traversal is not performed. It can be toggled once the import has
262+
started to stop or start the traversal.
263+
.sp
264+
Default value: 1
265+
.RE
266+
267+
.sp
268+
.ne 2
269+
.na
270+
\fBspa_load_verify_maxinflight\fR (int)
271+
.ad
272+
.RS 12n
273+
Maximum concurrent I/Os during the traversal performed during an "extreme
274+
rewind" (\fB-X\fR) pool import.
275+
.sp
276+
Default value: 10000
277+
.RE
278+
233279
.sp
234280
.ne 2
235281
.na

module/zfs/spa.c

Lines changed: 66 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1855,6 +1855,7 @@ spa_load_verify_done(zio_t *zio)
18551855
spa_load_error_t *sle = zio->io_private;
18561856
dmu_object_type_t type = BP_GET_TYPE(bp);
18571857
int error = zio->io_error;
1858+
spa_t *spa = zio->io_spa;
18581859

18591860
if (error) {
18601861
if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
@@ -1864,23 +1865,56 @@ spa_load_verify_done(zio_t *zio)
18641865
atomic_add_64(&sle->sle_data_count, 1);
18651866
}
18661867
zio_data_buf_free(zio->io_data, zio->io_size);
1868+
1869+
mutex_enter(&spa->spa_scrub_lock);
1870+
spa->spa_scrub_inflight--;
1871+
cv_broadcast(&spa->spa_scrub_io_cv);
1872+
mutex_exit(&spa->spa_scrub_lock);
18671873
}
18681874

1875+
/*
1876+
* Maximum number of concurrent scrub i/os to create while verifying
1877+
* a pool while importing it.
1878+
*/
1879+
int spa_load_verify_maxinflight = 10000;
1880+
int spa_load_verify_metadata = B_TRUE;
1881+
int spa_load_verify_data = B_TRUE;
1882+
18691883
/*ARGSUSED*/
18701884
static int
18711885
spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
18721886
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
18731887
{
1874-
if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
1875-
zio_t *rio = arg;
1876-
size_t size = BP_GET_PSIZE(bp);
1877-
void *data = zio_data_buf_alloc(size);
1888+
zio_t *rio;
1889+
size_t size;
1890+
void *data;
18781891

1879-
zio_nowait(zio_read(rio, spa, bp, data, size,
1880-
spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
1881-
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
1882-
ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
1883-
}
1892+
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
1893+
return (0);
1894+
/*
1895+
* Note: normally this routine will not be called if
1896+
* spa_load_verify_metadata is not set. However, it may be useful
1897+
* to manually set the flag after the traversal has begun.
1898+
*/
1899+
if (!spa_load_verify_metadata)
1900+
return (0);
1901+
if (BP_GET_BUFC_TYPE(bp) == ARC_BUFC_DATA && !spa_load_verify_data)
1902+
return (0);
1903+
1904+
rio = arg;
1905+
size = BP_GET_PSIZE(bp);
1906+
data = zio_data_buf_alloc(size);
1907+
1908+
mutex_enter(&spa->spa_scrub_lock);
1909+
while (spa->spa_scrub_inflight >= spa_load_verify_maxinflight)
1910+
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
1911+
spa->spa_scrub_inflight++;
1912+
mutex_exit(&spa->spa_scrub_lock);
1913+
1914+
zio_nowait(zio_read(rio, spa, bp, data, size,
1915+
spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
1916+
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
1917+
ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
18841918
return (0);
18851919
}
18861920

@@ -1891,7 +1925,7 @@ spa_load_verify(spa_t *spa)
18911925
spa_load_error_t sle = { 0 };
18921926
zpool_rewind_policy_t policy;
18931927
boolean_t verify_ok = B_FALSE;
1894-
int error;
1928+
int error = 0;
18951929

18961930
zpool_get_rewind_policy(spa->spa_config, &policy);
18971931

@@ -1901,8 +1935,11 @@ spa_load_verify(spa_t *spa)
19011935
rio = zio_root(spa, NULL, &sle,
19021936
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
19031937

1904-
error = traverse_pool(spa, spa->spa_verify_min_txg,
1905-
TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio);
1938+
if (spa_load_verify_metadata) {
1939+
error = traverse_pool(spa, spa->spa_verify_min_txg,
1940+
TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
1941+
spa_load_verify_cb, rio);
1942+
}
19061943

19071944
(void) zio_wait(rio);
19081945

@@ -2781,7 +2818,7 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
27812818
spa_unload(spa);
27822819
spa_deactivate(spa);
27832820

2784-
spa->spa_load_max_txg--;
2821+
spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1;
27852822

27862823
spa_activate(spa, mode);
27872824
spa_async_suspend(spa);
@@ -2811,6 +2848,8 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
28112848
spa_set_log_state(spa, SPA_LOG_CLEAR);
28122849
} else {
28132850
spa->spa_load_max_txg = max_request;
2851+
if (max_request != UINT64_MAX)
2852+
spa->spa_extreme_rewind = B_TRUE;
28142853
}
28152854

28162855
load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING,
@@ -6603,3 +6642,17 @@ EXPORT_SYMBOL(spa_prop_clear_bootfs);
66036642
/* asynchronous event notification */
66046643
EXPORT_SYMBOL(spa_event_notify);
66056644
#endif
6645+
6646+
#if defined(_KERNEL) && defined(HAVE_SPL)
6647+
module_param(spa_load_verify_maxinflight, int, 0644);
6648+
MODULE_PARM_DESC(spa_load_verify_maxinflight,
6649+
"Max concurrent traversal I/Os while verifying pool during import -X");
6650+
6651+
module_param(spa_load_verify_metadata, int, 0644);
6652+
MODULE_PARM_DESC(spa_load_verify_metadata,
6653+
"Set to traverse metadata on pool import");
6654+
6655+
module_param(spa_load_verify_data, int, 0644);
6656+
MODULE_PARM_DESC(spa_load_verify_data,
6657+
"Set to traverse data on pool import");
6658+
#endif

0 commit comments

Comments
 (0)