Skip to content

Commit 044baf0

Browse files
committed
Use taskq for dump_bytes()
The vn_rdwr() function performs I/O by calling the vfs_write() or vfs_read() functions. These functions reside just below the system call layer and the expectation is they have almost the entire 8k of stack space to work with. In fact, certain layered configurations such as ext+lvm+md+multipath require the majority of this stack to avoid stack overflows. To avoid this posibility the vn_rdwr() call in dump_bytes() has been moved to the ZIO_TYPE_FREE, taskq. This ensures that all I/O will be performed with the majority of the stack space available. This ends up being very similiar to as if the I/O were issued via sys_write() or sys_read(). Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #1399 Closes #1423
1 parent 7ef5e54 commit 044baf0

File tree

5 files changed

+69
-6
lines changed

5 files changed

+69
-6
lines changed

include/sys/spa_impl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,9 @@ extern char *spa_config_path;
250250

251251
extern void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
252252
task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent);
253+
extern void spa_taskq_dispatch_sync(spa_t *, zio_type_t t, zio_taskq_type_t q,
254+
task_func_t *func, void *arg, uint_t flags);
255+
253256

254257
#ifdef __cplusplus
255258
}

include/sys/zfs_context.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,7 @@ extern int taskq_empty_ent(taskq_ent_t *);
409409
extern void taskq_init_ent(taskq_ent_t *);
410410
extern void taskq_destroy(taskq_t *);
411411
extern void taskq_wait(taskq_t *);
412+
extern void taskq_wait_id(taskq_t *, taskqid_t);
412413
extern int taskq_member(taskq_t *, kthread_t *);
413414
extern int taskq_cancel_id(taskq_t *, taskqid_t);
414415
extern void system_taskq_init(void);

lib/libzpool/taskq.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,12 @@ taskq_wait(taskq_t *tq)
211211
mutex_exit(&tq->tq_lock);
212212
}
213213

214+
void
215+
taskq_wait_id(taskq_t *tq, taskqid_t id)
216+
{
217+
taskq_wait(tq);
218+
}
219+
214220
static void
215221
taskq_thread(void *arg)
216222
{

module/zfs/dmu_send.c

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include <sys/dsl_prop.h>
4040
#include <sys/dsl_pool.h>
4141
#include <sys/dsl_synctask.h>
42+
#include <sys/spa_impl.h>
4243
#include <sys/zfs_ioctl.h>
4344
#include <sys/zap.h>
4445
#include <sys/zio_checksum.h>
@@ -53,21 +54,48 @@ int zfs_send_corrupt_data = B_FALSE;
5354

5455
static char *dmu_recv_tag = "dmu_recv_tag";
5556

56-
static int
57-
dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
57+
typedef struct dump_bytes_io {
58+
dmu_sendarg_t *dbi_dsp;
59+
void *dbi_buf;
60+
int dbi_len;
61+
} dump_bytes_io_t;
62+
63+
static void
64+
dump_bytes_strategy(void *arg)
5865
{
66+
dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
67+
dmu_sendarg_t *dsp = dbi->dbi_dsp;
5968
dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
6069
ssize_t resid; /* have to get resid to get detailed errno */
61-
ASSERT3U(len % 8, ==, 0);
70+
ASSERT3U(dbi->dbi_len % 8, ==, 0);
6271

63-
fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
72+
fletcher_4_incremental_native(dbi->dbi_buf, dbi->dbi_len, &dsp->dsa_zc);
6473
dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
65-
(caddr_t)buf, len,
74+
(caddr_t)dbi->dbi_buf, dbi->dbi_len,
6675
0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
6776

6877
mutex_enter(&ds->ds_sendstream_lock);
69-
*dsp->dsa_off += len;
78+
*dsp->dsa_off += dbi->dbi_len;
7079
mutex_exit(&ds->ds_sendstream_lock);
80+
}
81+
82+
static int
83+
dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
84+
{
85+
dump_bytes_io_t dbi;
86+
87+
dbi.dbi_dsp = dsp;
88+
dbi.dbi_buf = buf;
89+
dbi.dbi_len = len;
90+
91+
/*
92+
* The vn_rdwr() call is performed in a taskq to ensure that there is
93+
* always enough stack space to write safely to the target filesystem.
94+
* The ZIO_TYPE_FREE threads are used because there can be a lot of
95+
* them and they are used in vdev_file.c for a similar purpose.
96+
*/
97+
spa_taskq_dispatch_sync(dmu_objset_spa(dsp->dsa_os), ZIO_TYPE_FREE,
98+
ZIO_TASKQ_ISSUE, dump_bytes_strategy, &dbi, TQ_SLEEP);
7199

72100
return (dsp->dsa_err);
73101
}

module/zfs/spa.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,31 @@ spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
926926
taskq_dispatch_ent(tq, func, arg, flags, ent);
927927
}
928928

929+
/*
930+
* Same as spa_taskq_dispatch_ent() but block on the task until completion.
931+
*/
932+
void
933+
spa_taskq_dispatch_sync(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
934+
task_func_t *func, void *arg, uint_t flags)
935+
{
936+
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
937+
taskq_t *tq;
938+
taskqid_t id;
939+
940+
ASSERT3P(tqs->stqs_taskq, !=, NULL);
941+
ASSERT3U(tqs->stqs_count, !=, 0);
942+
943+
if (tqs->stqs_count == 1) {
944+
tq = tqs->stqs_taskq[0];
945+
} else {
946+
tq = tqs->stqs_taskq[gethrtime() % tqs->stqs_count];
947+
}
948+
949+
id = taskq_dispatch(tq, func, arg, flags);
950+
if (id)
951+
taskq_wait_id(tq, id);
952+
}
953+
929954
static void
930955
spa_create_zio_taskqs(spa_t *spa)
931956
{

0 commit comments

Comments
 (0)