Skip to content

Commit 98b2541

Browse files
grwilsonbehlendorf
authored andcommitted
Illumos #5244 - zio pipeline callers should explicitly invoke next stage
5244 zio pipeline callers should explicitly invoke next stage Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Alex Reece <alex.reece@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Richard Elling <richard.elling@gmail.com> Reviewed by: Dan McDonald <danmcd@omniti.com> Reviewed by: Steven Hartland <killing@multiplay.co.uk> Approved by: Gordon Ross <gwr@nexenta.com> References: https://www.illumos.org/issues/5244 illumos/illumos-gate@738f37b Porting Notes: 1. The unported "2932 support crash dumps to raidz, etc. pools" caused a merge conflict due to a copyright difference in module/zfs/vdev_raidz.c. 2. The unported "4128 disks in zpools never go away when pulled" and additional Linux-specific changes caused merge conflicts in module/zfs/vdev_disk.c. Ported-by: Richard Yao <richard.yao@clusterhq.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2828
1 parent 8dd86a1 commit 98b2541

File tree

9 files changed

+55
-34
lines changed

9 files changed

+55
-34
lines changed

include/sys/vdev_impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size,
6060
uint64_t *ashift);
6161
typedef void vdev_close_func_t(vdev_t *vd);
6262
typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
63-
typedef int vdev_io_start_func_t(zio_t *zio);
63+
typedef void vdev_io_start_func_t(zio_t *zio);
6464
typedef void vdev_io_done_func_t(zio_t *zio);
6565
typedef void vdev_state_change_func_t(vdev_t *vd, int, int);
6666
typedef void vdev_hold_func_t(vdev_t *vd);

include/sys/zio.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,9 +153,6 @@ typedef enum zio_priority {
153153
ZIO_PRIORITY_NOW /* non-queued i/os (e.g. free) */
154154
} zio_priority_t;
155155

156-
#define ZIO_PIPELINE_CONTINUE 0x100
157-
#define ZIO_PIPELINE_STOP 0x101
158-
159156
enum zio_flag {
160157
/*
161158
* Flags inherited by gang, ddt, and vdev children,

lib/libzpool/taskq.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
/*
2626
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
2727
* Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved.
28+
* Copyright (c) 2014 by Delphix. All rights reserved.
2829
*/
2930

3031
#include <sys/zfs_context.h>
@@ -33,8 +34,10 @@ int taskq_now;
3334
taskq_t *system_taskq;
3435

3536
#define TASKQ_ACTIVE 0x00010000
37+
#define TASKQ_NAMELEN 31
3638

3739
struct taskq {
40+
char tq_name[TASKQ_NAMELEN + 1];
3841
kmutex_t tq_lock;
3942
krwlock_t tq_threadlock;
4043
kcondvar_t tq_dispatch_cv;
@@ -280,6 +283,7 @@ taskq_create(const char *name, int nthreads, pri_t pri,
280283
cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL);
281284
cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL);
282285
cv_init(&tq->tq_maxalloc_cv, NULL, CV_DEFAULT, NULL);
286+
(void) strncpy(tq->tq_name, name, TASKQ_NAMELEN + 1);
283287
tq->tq_flags = flags | TASKQ_ACTIVE;
284288
tq->tq_active = nthreads;
285289
tq->tq_nthreads = nthreads;

module/zfs/vdev_disk.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -657,7 +657,7 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
657657
return (0);
658658
}
659659

660-
static int
660+
static void
661661
vdev_disk_io_start(zio_t *zio)
662662
{
663663
vdev_t *v = zio->io_vd;
@@ -669,7 +669,8 @@ vdev_disk_io_start(zio_t *zio)
669669

670670
if (!vdev_readable(v)) {
671671
zio->io_error = SET_ERROR(ENXIO);
672-
return (ZIO_PIPELINE_CONTINUE);
672+
zio_interrupt(zio);
673+
return;
673674
}
674675

675676
switch (zio->io_cmd) {
@@ -685,7 +686,7 @@ vdev_disk_io_start(zio_t *zio)
685686

686687
error = vdev_disk_io_flush(vd->vd_bdev, zio);
687688
if (error == 0)
688-
return (ZIO_PIPELINE_STOP);
689+
return;
689690

690691
zio->io_error = error;
691692
if (error == ENOTSUP)
@@ -697,8 +698,8 @@ vdev_disk_io_start(zio_t *zio)
697698
zio->io_error = SET_ERROR(ENOTSUP);
698699
}
699700

700-
return (ZIO_PIPELINE_CONTINUE);
701-
701+
zio_execute(zio);
702+
return;
702703
case ZIO_TYPE_WRITE:
703704
flags = WRITE;
704705
break;
@@ -709,17 +710,17 @@ vdev_disk_io_start(zio_t *zio)
709710

710711
default:
711712
zio->io_error = SET_ERROR(ENOTSUP);
712-
return (ZIO_PIPELINE_CONTINUE);
713+
zio_interrupt(zio);
714+
return;
713715
}
714716

715717
error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data,
716718
zio->io_size, zio->io_offset, flags);
717719
if (error) {
718720
zio->io_error = error;
719-
return (ZIO_PIPELINE_CONTINUE);
721+
zio_interrupt(zio);
722+
return;
720723
}
721-
722-
return (ZIO_PIPELINE_STOP);
723724
}
724725

725726
static void

module/zfs/vdev_file.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
*/
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23-
* Copyright (c) 2013 by Delphix. All rights reserved.
23+
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
2424
*/
2525

2626
#include <sys/zfs_context.h>
@@ -172,7 +172,7 @@ vdev_file_io_fsync(void *arg)
172172
zio_interrupt(zio);
173173
}
174174

175-
static int
175+
static void
176176
vdev_file_io_start(zio_t *zio)
177177
{
178178
vdev_t *vd = zio->io_vd;
@@ -182,7 +182,8 @@ vdev_file_io_start(zio_t *zio)
182182
/* XXPOLICY */
183183
if (!vdev_readable(vd)) {
184184
zio->io_error = SET_ERROR(ENXIO);
185-
return (ZIO_PIPELINE_CONTINUE);
185+
zio_interrupt(zio);
186+
return;
186187
}
187188

188189
switch (zio->io_cmd) {
@@ -201,7 +202,7 @@ vdev_file_io_start(zio_t *zio)
201202
if (spl_fstrans_check()) {
202203
VERIFY3U(taskq_dispatch(vdev_file_taskq,
203204
vdev_file_io_fsync, zio, TQ_SLEEP), !=, 0);
204-
return (ZIO_PIPELINE_STOP);
205+
return;
205206
}
206207

207208
zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
@@ -211,13 +212,12 @@ vdev_file_io_start(zio_t *zio)
211212
zio->io_error = SET_ERROR(ENOTSUP);
212213
}
213214

214-
return (ZIO_PIPELINE_CONTINUE);
215+
zio_execute(zio);
216+
return;
215217
}
216218

217219
VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
218220
TQ_SLEEP), !=, 0);
219-
220-
return (ZIO_PIPELINE_STOP);
221221
}
222222

223223
/* ARGSUSED */

module/zfs/vdev_mirror.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
*/
2525

2626
/*
27-
* Copyright (c) 2013 by Delphix. All rights reserved.
27+
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
2828
*/
2929

3030
#include <sys/zfs_context.h>
@@ -327,7 +327,7 @@ vdev_mirror_child_select(zio_t *zio)
327327
return (-1);
328328
}
329329

330-
static int
330+
static void
331331
vdev_mirror_io_start(zio_t *zio)
332332
{
333333
mirror_map_t *mm;
@@ -352,7 +352,8 @@ vdev_mirror_io_start(zio_t *zio)
352352
zio->io_type, zio->io_priority, 0,
353353
vdev_mirror_scrub_done, mc));
354354
}
355-
return (ZIO_PIPELINE_CONTINUE);
355+
zio_execute(zio);
356+
return;
356357
}
357358
/*
358359
* For normal reads just pick one child.
@@ -378,7 +379,7 @@ vdev_mirror_io_start(zio_t *zio)
378379
c++;
379380
}
380381

381-
return (ZIO_PIPELINE_CONTINUE);
382+
zio_execute(zio);
382383
}
383384

384385
static int

module/zfs/vdev_missing.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
*/
2525

2626
/*
27-
* Copyright (c) 2013 by Delphix. All rights reserved.
27+
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
2828
*/
2929

3030
/*
@@ -66,11 +66,11 @@ vdev_missing_close(vdev_t *vd)
6666
}
6767

6868
/* ARGSUSED */
69-
static int
69+
static void
7070
vdev_missing_io_start(zio_t *zio)
7171
{
7272
zio->io_error = SET_ERROR(ENOTSUP);
73-
return (ZIO_PIPELINE_CONTINUE);
73+
zio_execute(zio);
7474
}
7575

7676
/* ARGSUSED */

module/zfs/vdev_raidz.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
/*
2323
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24-
* Copyright (c) 2013 by Delphix. All rights reserved.
24+
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
2525
*/
2626

2727
#include <sys/zfs_context.h>
@@ -1567,7 +1567,7 @@ vdev_raidz_child_done(zio_t *zio)
15671567
* vdevs have had errors, then create zio read operations to the parity
15681568
* columns' VDevs as well.
15691569
*/
1570-
static int
1570+
static void
15711571
vdev_raidz_io_start(zio_t *zio)
15721572
{
15731573
vdev_t *vd = zio->io_vd;
@@ -1611,7 +1611,8 @@ vdev_raidz_io_start(zio_t *zio)
16111611
ZIO_FLAG_NODATA | ZIO_FLAG_OPTIONAL, NULL, NULL));
16121612
}
16131613

1614-
return (ZIO_PIPELINE_CONTINUE);
1614+
zio_execute(zio);
1615+
return;
16151616
}
16161617

16171618
ASSERT(zio->io_type == ZIO_TYPE_READ);
@@ -1651,7 +1652,7 @@ vdev_raidz_io_start(zio_t *zio)
16511652
}
16521653
}
16531654

1654-
return (ZIO_PIPELINE_CONTINUE);
1655+
zio_execute(zio);
16551656
}
16561657

16571658

module/zfs/zio.c

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
5959
kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
6060
int zio_delay_max = ZIO_DELAY_MAX;
6161

62+
#define ZIO_PIPELINE_CONTINUE 0x100
63+
#define ZIO_PIPELINE_STOP 0x101
64+
6265
/*
6366
* The following actions directly effect the spa's sync-to-convergence logic.
6467
* The values below define the sync pass when we start performing the action.
@@ -2526,6 +2529,18 @@ zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp)
25262529
* Read and write to physical devices
25272530
* ==========================================================================
25282531
*/
2532+
2533+
2534+
/*
2535+
* Issue an I/O to the underlying vdev. Typically the issue pipeline
2536+
* stops after this stage and will resume upon I/O completion.
2537+
* However, there are instances where the vdev layer may need to
2538+
* continue the pipeline when an I/O was not issued. Since the I/O
2539+
* that was sent to the vdev layer might be different than the one
2540+
* currently active in the pipeline (see vdev_queue_io()), we explicitly
2541+
* force the underlying vdev layers to call either zio_execute() or
2542+
* zio_interrupt() to ensure that the pipeline continues with the correct I/O.
2543+
*/
25292544
static int
25302545
zio_vdev_io_start(zio_t *zio)
25312546
{
@@ -2543,15 +2558,16 @@ zio_vdev_io_start(zio_t *zio)
25432558
/*
25442559
* The mirror_ops handle multiple DVAs in a single BP.
25452560
*/
2546-
return (vdev_mirror_ops.vdev_op_io_start(zio));
2561+
vdev_mirror_ops.vdev_op_io_start(zio);
2562+
return (ZIO_PIPELINE_STOP);
25472563
}
25482564

25492565
/*
25502566
* We keep track of time-sensitive I/Os so that the scan thread
25512567
* can quickly react to certain workloads. In particular, we care
25522568
* about non-scrubbing, top-level reads and writes with the following
25532569
* characteristics:
2554-
* - synchronous writes of user data to non-slog devices
2570+
* - synchronous writes of user data to non-slog devices
25552571
* - any reads of user data
25562572
* When these conditions are met, adjust the timestamp of spa_last_io
25572573
* which allows the scan thread to adjust its workload accordingly.
@@ -2637,7 +2653,8 @@ zio_vdev_io_start(zio_t *zio)
26372653
}
26382654
}
26392655

2640-
return (vd->vdev_ops->vdev_op_io_start(zio));
2656+
vd->vdev_ops->vdev_op_io_start(zio);
2657+
return (ZIO_PIPELINE_STOP);
26412658
}
26422659

26432660
static int

0 commit comments

Comments
 (0)