Skip to content

Commit a38718a

Browse files
Garrett D'Amorebehlendorf
authored andcommitted
Illumos #734: Use taskq_dispatch_ent() interface
It has been observed that some of the hottest locks are those of the zio taskqs. Contention on these locks can limit the rate at which zios are dispatched which limits performance. This upstream change from Illumos uses new interface to the taskqs which allow them to utilize a prealloc'ed taskq_ent_t. This removes the need to perform an allocation at dispatch time while holding the contended lock. This has the effect of improving system performance. Reviewed by: Albert Lee <trisk@nexenta.com> Reviewed by: Richard Lowe <richlowe@richlowe.net> Reviewed by: Alexey Zaytsev <alexey.zaytsev@nexenta.com> Reviewed by: Jason Brian King <jason.brian.king@gmail.com> Reviewed by: George Wilson <gwilson@zfsmail.com> Reviewed by: Adam Leventhal <ahl@delphix.com> Approved by: Gordon Ross <gwr@nexenta.com> References to Illumos issue: https://www.illumos.org/issues/734 Ported-by: Prakash Surya <surya1@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #482
1 parent 30a9524 commit a38718a

File tree

5 files changed

+134
-41
lines changed

5 files changed

+134
-41
lines changed

include/sys/zfs_context.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
2323
* Use is subject to license terms.
2424
*/
25+
/*
26+
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27+
*/
2528

2629
#ifndef _SYS_ZFS_CONTEXT_H
2730
#define _SYS_ZFS_CONTEXT_H
@@ -365,6 +368,16 @@ typedef struct taskq taskq_t;
365368
typedef uintptr_t taskqid_t;
366369
typedef void (task_func_t)(void *);
367370

371+
typedef struct taskq_ent {
372+
struct taskq_ent *tqent_next;
373+
struct taskq_ent *tqent_prev;
374+
task_func_t *tqent_func;
375+
void *tqent_arg;
376+
uintptr_t tqent_flags;
377+
} taskq_ent_t;
378+
379+
#define TQENT_FLAG_PREALLOC 0x1 /* taskq_dispatch_ent used */
380+
368381
#define TASKQ_PREPOPULATE 0x0001
369382
#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */
370383
#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */
@@ -385,6 +398,10 @@ extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
385398
#define taskq_create_sysdc(a, b, d, e, p, dc, f) \
386399
(taskq_create(a, b, maxclsyspri, d, e, f))
387400
extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
401+
extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
402+
taskq_ent_t *);
403+
extern int taskq_empty_ent(taskq_ent_t *);
404+
extern void taskq_init_ent(taskq_ent_t *);
388405
extern void taskq_destroy(taskq_t *);
389406
extern void taskq_wait(taskq_t *);
390407
extern int taskq_member(taskq_t *, kthread_t *);

include/sys/zio.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
/*
2323
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2424
*/
25+
/*
26+
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27+
*/
2528

2629
#ifndef _ZIO_H
2730
#define _ZIO_H
@@ -423,6 +426,9 @@ struct zio {
423426
/* FMA state */
424427
zio_cksum_report_t *io_cksum_report;
425428
uint64_t io_ena;
429+
430+
/* Taskq dispatching state */
431+
taskq_ent_t io_tqent;
426432
};
427433

428434
extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,

lib/libzpool/taskq.c

Lines changed: 91 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,15 @@
2222
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
2323
* Use is subject to license terms.
2424
*/
25+
/*
26+
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27+
*/
2528

2629
#include <sys/zfs_context.h>
2730

2831
int taskq_now;
2932
taskq_t *system_taskq;
3033

31-
typedef struct task {
32-
struct task *task_next;
33-
struct task *task_prev;
34-
task_func_t *task_func;
35-
void *task_arg;
36-
} task_t;
37-
3834
#define TASKQ_ACTIVE 0x00010000
3935

4036
struct taskq {
@@ -51,18 +47,19 @@ struct taskq {
5147
int tq_maxalloc;
5248
kcondvar_t tq_maxalloc_cv;
5349
int tq_maxalloc_wait;
54-
task_t *tq_freelist;
55-
task_t tq_task;
50+
taskq_ent_t *tq_freelist;
51+
taskq_ent_t tq_task;
5652
};
5753

58-
static task_t *
54+
static taskq_ent_t *
5955
task_alloc(taskq_t *tq, int tqflags)
6056
{
61-
task_t *t;
57+
taskq_ent_t *t;
6258
int rv;
6359

6460
again: if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
65-
tq->tq_freelist = t->task_next;
61+
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
62+
tq->tq_freelist = t->tqent_next;
6663
} else {
6764
if (tq->tq_nalloc >= tq->tq_maxalloc) {
6865
if (!(tqflags & KM_SLEEP))
@@ -87,25 +84,28 @@ again: if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
8784
}
8885
mutex_exit(&tq->tq_lock);
8986

90-
t = kmem_alloc(sizeof (task_t), tqflags);
87+
t = kmem_alloc(sizeof (taskq_ent_t), tqflags);
9188

9289
mutex_enter(&tq->tq_lock);
93-
if (t != NULL)
90+
if (t != NULL) {
91+
/* Make sure we start without any flags */
92+
t->tqent_flags = 0;
9493
tq->tq_nalloc++;
94+
}
9595
}
9696
return (t);
9797
}
9898

9999
static void
100-
task_free(taskq_t *tq, task_t *t)
100+
task_free(taskq_t *tq, taskq_ent_t *t)
101101
{
102102
if (tq->tq_nalloc <= tq->tq_minalloc) {
103-
t->task_next = tq->tq_freelist;
103+
t->tqent_next = tq->tq_freelist;
104104
tq->tq_freelist = t;
105105
} else {
106106
tq->tq_nalloc--;
107107
mutex_exit(&tq->tq_lock);
108-
kmem_free(t, sizeof (task_t));
108+
kmem_free(t, sizeof (taskq_ent_t));
109109
mutex_enter(&tq->tq_lock);
110110
}
111111

@@ -116,7 +116,7 @@ task_free(taskq_t *tq, task_t *t)
116116
taskqid_t
117117
taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
118118
{
119-
task_t *t;
119+
taskq_ent_t *t;
120120

121121
if (taskq_now) {
122122
func(arg);
@@ -130,26 +130,77 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
130130
return (0);
131131
}
132132
if (tqflags & TQ_FRONT) {
133-
t->task_next = tq->tq_task.task_next;
134-
t->task_prev = &tq->tq_task;
133+
t->tqent_next = tq->tq_task.tqent_next;
134+
t->tqent_prev = &tq->tq_task;
135135
} else {
136-
t->task_next = &tq->tq_task;
137-
t->task_prev = tq->tq_task.task_prev;
136+
t->tqent_next = &tq->tq_task;
137+
t->tqent_prev = tq->tq_task.tqent_prev;
138138
}
139-
t->task_next->task_prev = t;
140-
t->task_prev->task_next = t;
141-
t->task_func = func;
142-
t->task_arg = arg;
139+
t->tqent_next->tqent_prev = t;
140+
t->tqent_prev->tqent_next = t;
141+
t->tqent_func = func;
142+
t->tqent_arg = arg;
143+
144+
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
145+
143146
cv_signal(&tq->tq_dispatch_cv);
144147
mutex_exit(&tq->tq_lock);
145148
return (1);
146149
}
147150

151+
int
152+
taskq_empty_ent(taskq_ent_t *t)
153+
{
154+
return t->tqent_next == NULL;
155+
}
156+
157+
void
158+
taskq_init_ent(taskq_ent_t *t)
159+
{
160+
t->tqent_next = NULL;
161+
t->tqent_prev = NULL;
162+
t->tqent_func = NULL;
163+
t->tqent_arg = NULL;
164+
t->tqent_flags = 0;
165+
}
166+
167+
void
168+
taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
169+
taskq_ent_t *t)
170+
{
171+
ASSERT(func != NULL);
172+
ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
173+
174+
/*
175+
* Mark it as a prealloc'd task. This is important
176+
* to ensure that we don't free it later.
177+
*/
178+
t->tqent_flags |= TQENT_FLAG_PREALLOC;
179+
/*
180+
* Enqueue the task to the underlying queue.
181+
*/
182+
mutex_enter(&tq->tq_lock);
183+
184+
if (flags & TQ_FRONT) {
185+
t->tqent_next = tq->tq_task.tqent_next;
186+
t->tqent_prev = &tq->tq_task;
187+
} else {
188+
t->tqent_next = &tq->tq_task;
189+
t->tqent_prev = tq->tq_task.tqent_prev;
190+
}
191+
t->tqent_next->tqent_prev = t;
192+
t->tqent_prev->tqent_next = t;
193+
t->tqent_func = func;
194+
t->tqent_arg = arg;
195+
cv_signal(&tq->tq_dispatch_cv);
196+
mutex_exit(&tq->tq_lock);
197+
}
198+
148199
void
149200
taskq_wait(taskq_t *tq)
150201
{
151202
mutex_enter(&tq->tq_lock);
152-
while (tq->tq_task.task_next != &tq->tq_task || tq->tq_active != 0)
203+
while (tq->tq_task.tqent_next != &tq->tq_task || tq->tq_active != 0)
153204
cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
154205
mutex_exit(&tq->tq_lock);
155206
}
@@ -158,27 +209,32 @@ static void
158209
taskq_thread(void *arg)
159210
{
160211
taskq_t *tq = arg;
161-
task_t *t;
212+
taskq_ent_t *t;
213+
boolean_t prealloc;
162214

163215
mutex_enter(&tq->tq_lock);
164216
while (tq->tq_flags & TASKQ_ACTIVE) {
165-
if ((t = tq->tq_task.task_next) == &tq->tq_task) {
217+
if ((t = tq->tq_task.tqent_next) == &tq->tq_task) {
166218
if (--tq->tq_active == 0)
167219
cv_broadcast(&tq->tq_wait_cv);
168220
cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock);
169221
tq->tq_active++;
170222
continue;
171223
}
172-
t->task_prev->task_next = t->task_next;
173-
t->task_next->task_prev = t->task_prev;
224+
t->tqent_prev->tqent_next = t->tqent_next;
225+
t->tqent_next->tqent_prev = t->tqent_prev;
226+
t->tqent_next = NULL;
227+
t->tqent_prev = NULL;
228+
prealloc = t->tqent_flags & TQENT_FLAG_PREALLOC;
174229
mutex_exit(&tq->tq_lock);
175230

176231
rw_enter(&tq->tq_threadlock, RW_READER);
177-
t->task_func(t->task_arg);
232+
t->tqent_func(t->tqent_arg);
178233
rw_exit(&tq->tq_threadlock);
179234

180235
mutex_enter(&tq->tq_lock);
181-
task_free(tq, t);
236+
if (!prealloc)
237+
task_free(tq, t);
182238
}
183239
tq->tq_nthreads--;
184240
cv_broadcast(&tq->tq_wait_cv);
@@ -217,8 +273,8 @@ taskq_create(const char *name, int nthreads, pri_t pri,
217273
tq->tq_nthreads = nthreads;
218274
tq->tq_minalloc = minalloc;
219275
tq->tq_maxalloc = maxalloc;
220-
tq->tq_task.task_next = &tq->tq_task;
221-
tq->tq_task.task_prev = &tq->tq_task;
276+
tq->tq_task.tqent_next = &tq->tq_task;
277+
tq->tq_task.tqent_prev = &tq->tq_task;
222278
tq->tq_threadlist = kmem_alloc(nthreads*sizeof(kthread_t *), KM_SLEEP);
223279

224280
if (flags & TASKQ_PREPOPULATE) {

module/zfs/spa.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
/*
2323
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2424
*/
25+
/*
26+
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27+
*/
2528

2629
/*
2730
* This file contains all the routines used when modifying on-disk SPA state.
@@ -665,7 +668,7 @@ spa_create_zio_taskqs(spa_t *spa)
665668
const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
666669
enum zti_modes mode = ztip->zti_mode;
667670
uint_t value = ztip->zti_value;
668-
uint_t flags = TASKQ_PREPOPULATE;
671+
uint_t flags = 0;
669672
char name[32];
670673

671674
if (t == ZIO_TYPE_WRITE)

module/zfs/zio.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2323
* Copyright (c) 2011 by Delphix. All rights reserved.
24+
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
2425
*/
2526

2627
#include <sys/zfs_context.h>
@@ -570,6 +571,8 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
570571
zio_add_child(pio, zio);
571572
}
572573

574+
taskq_init_ent(&zio->io_tqent);
575+
573576
return (zio);
574577
}
575578

@@ -1073,7 +1076,7 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
10731076
{
10741077
spa_t *spa = zio->io_spa;
10751078
zio_type_t t = zio->io_type;
1076-
int flags = TQ_NOSLEEP | (cutinline ? TQ_FRONT : 0);
1079+
int flags = (cutinline ? TQ_FRONT : 0);
10771080

10781081
/*
10791082
* If we're a config writer or a probe, the normal issue and
@@ -1098,8 +1101,14 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
10981101

10991102
ASSERT3U(q, <, ZIO_TASKQ_TYPES);
11001103

1101-
while (taskq_dispatch(spa->spa_zio_taskq[t][q],
1102-
(task_func_t *)zio_execute, zio, flags) == 0); /* do nothing */
1104+
/*
1105+
* NB: We are assuming that the zio can only be dispatched
1106+
* to a single taskq at a time. It would be a grievous error
1107+
* to dispatch the zio to another taskq at the same time.
1108+
*/
1109+
ASSERT(taskq_empty_ent(&zio->io_tqent));
1110+
taskq_dispatch_ent(spa->spa_zio_taskq[t][q],
1111+
(task_func_t *)zio_execute, zio, flags, &zio->io_tqent);
11031112
}
11041113

11051114
static boolean_t
@@ -2947,9 +2956,11 @@ zio_done(zio_t *zio)
29472956
* Reexecution is potentially a huge amount of work.
29482957
* Hand it off to the otherwise-unused claim taskq.
29492958
*/
2950-
(void) taskq_dispatch(
2959+
ASSERT(taskq_empty_ent(&zio->io_tqent));
2960+
(void) taskq_dispatch_ent(
29512961
zio->io_spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE],
2952-
(task_func_t *)zio_reexecute, zio, TQ_SLEEP);
2962+
(task_func_t *)zio_reexecute, zio, 0,
2963+
&zio->io_tqent);
29532964
}
29542965
return (ZIO_PIPELINE_STOP);
29552966
}

0 commit comments

Comments
 (0)