@@ -190,6 +190,22 @@ vdev_queue_offset_compare(const void *x1, const void *x2)
190
190
return (0 );
191
191
}
192
192
193
+ static inline avl_tree_t *
194
+ vdev_queue_class_tree (vdev_queue_t * vq , zio_priority_t p )
195
+ {
196
+ return (& vq -> vq_class [p ].vqc_queued_tree );
197
+ }
198
+
199
+ static inline avl_tree_t *
200
+ vdev_queue_type_tree (vdev_queue_t * vq , zio_type_t t )
201
+ {
202
+ ASSERT (t == ZIO_TYPE_READ || t == ZIO_TYPE_WRITE );
203
+ if (t == ZIO_TYPE_READ )
204
+ return (& vq -> vq_read_offset_tree );
205
+ else
206
+ return (& vq -> vq_write_offset_tree );
207
+ }
208
+
193
209
int
194
210
vdev_queue_timestamp_compare (const void * x1 , const void * x2 )
195
211
{
@@ -303,7 +319,7 @@ vdev_queue_class_to_issue(vdev_queue_t *vq)
303
319
304
320
/* find a queue that has not reached its minimum # outstanding i/os */
305
321
for (p = 0 ; p < ZIO_PRIORITY_NUM_QUEUEABLE ; p ++ ) {
306
- if (avl_numnodes (& vq -> vq_class [ p ]. vqc_queued_tree ) > 0 &&
322
+ if (avl_numnodes (vdev_queue_class_tree ( vq , p ) ) > 0 &&
307
323
vq -> vq_class [p ].vqc_active <
308
324
vdev_queue_class_min_active (p ))
309
325
return (p );
@@ -314,7 +330,7 @@ vdev_queue_class_to_issue(vdev_queue_t *vq)
314
330
* maximum # outstanding i/os.
315
331
*/
316
332
for (p = 0 ; p < ZIO_PRIORITY_NUM_QUEUEABLE ; p ++ ) {
317
- if (avl_numnodes (& vq -> vq_class [ p ]. vqc_queued_tree ) > 0 &&
333
+ if (avl_numnodes (vdev_queue_class_tree ( vq , p ) ) > 0 &&
318
334
vq -> vq_class [p ].vqc_active <
319
335
vdev_queue_class_max_active (spa , p ))
320
336
return (p );
@@ -335,20 +351,27 @@ vdev_queue_init(vdev_t *vd)
335
351
336
352
avl_create (& vq -> vq_active_tree , vdev_queue_offset_compare ,
337
353
sizeof (zio_t ), offsetof(struct zio , io_queue_node ));
354
+ avl_create (vdev_queue_type_tree (vq , ZIO_TYPE_READ ),
355
+ vdev_queue_offset_compare , sizeof (zio_t ),
356
+ offsetof(struct zio , io_offset_node ));
357
+ avl_create (vdev_queue_type_tree (vq , ZIO_TYPE_WRITE ),
358
+ vdev_queue_offset_compare , sizeof (zio_t ),
359
+ offsetof(struct zio , io_offset_node ));
338
360
339
361
for (p = 0 ; p < ZIO_PRIORITY_NUM_QUEUEABLE ; p ++ ) {
362
+ int (* compfn ) (const void * , const void * );
363
+
340
364
/*
341
- * The synchronous i/o queues are FIFO rather than LBA ordered.
342
- * This provides more consistent latency for these i/os, and
343
- * they tend to not be tightly clustered anyway so there is
344
- * little to no throughput loss.
365
+ * The synchronous i/o queues are dispatched in FIFO rather
366
+ * than LBA order. This provides more consistent latency for
367
+ * these i/os.
345
368
*/
346
- boolean_t fifo = (p == ZIO_PRIORITY_SYNC_READ ||
347
- p == ZIO_PRIORITY_SYNC_WRITE ) ;
348
- avl_create ( & vq -> vq_class [ p ]. vqc_queued_tree ,
349
- fifo ? vdev_queue_timestamp_compare :
350
- vdev_queue_offset_compare ,
351
- sizeof (zio_t ), offsetof(struct zio , io_queue_node ));
369
+ if (p == ZIO_PRIORITY_SYNC_READ || p == ZIO_PRIORITY_SYNC_WRITE )
370
+ compfn = vdev_queue_timestamp_compare ;
371
+ else
372
+ compfn = vdev_queue_offset_compare ;
373
+ avl_create ( vdev_queue_class_tree ( vq , p ), compfn ,
374
+ sizeof (zio_t ), offsetof(struct zio , io_queue_node ));
352
375
}
353
376
}
354
377
@@ -359,8 +382,10 @@ vdev_queue_fini(vdev_t *vd)
359
382
zio_priority_t p ;
360
383
361
384
for (p = 0 ; p < ZIO_PRIORITY_NUM_QUEUEABLE ; p ++ )
362
- avl_destroy (& vq -> vq_class [ p ]. vqc_queued_tree );
385
+ avl_destroy (vdev_queue_class_tree ( vq , p ) );
363
386
avl_destroy (& vq -> vq_active_tree );
387
+ avl_destroy (vdev_queue_type_tree (vq , ZIO_TYPE_READ ));
388
+ avl_destroy (vdev_queue_type_tree (vq , ZIO_TYPE_WRITE ));
364
389
365
390
mutex_destroy (& vq -> vq_lock );
366
391
}
@@ -372,7 +397,8 @@ vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
372
397
spa_stats_history_t * ssh = & spa -> spa_stats .io_history ;
373
398
374
399
ASSERT3U (zio -> io_priority , < , ZIO_PRIORITY_NUM_QUEUEABLE );
375
- avl_add (& vq -> vq_class [zio -> io_priority ].vqc_queued_tree , zio );
400
+ avl_add (vdev_queue_class_tree (vq , zio -> io_priority ), zio );
401
+ avl_add (vdev_queue_type_tree (vq , zio -> io_type ), zio );
376
402
377
403
if (ssh -> kstat != NULL ) {
378
404
mutex_enter (& ssh -> lock );
@@ -388,7 +414,8 @@ vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
388
414
spa_stats_history_t * ssh = & spa -> spa_stats .io_history ;
389
415
390
416
ASSERT3U (zio -> io_priority , < , ZIO_PRIORITY_NUM_QUEUEABLE );
391
- avl_remove (& vq -> vq_class [zio -> io_priority ].vqc_queued_tree , zio );
417
+ avl_remove (vdev_queue_class_tree (vq , zio -> io_priority ), zio );
418
+ avl_remove (vdev_queue_type_tree (vq , zio -> io_type ), zio );
392
419
393
420
if (ssh -> kstat != NULL ) {
394
421
mutex_enter (& ssh -> lock );
@@ -472,8 +499,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
472
499
uint64_t maxgap = 0 ;
473
500
uint64_t size ;
474
501
boolean_t stretch = B_FALSE ;
475
- vdev_queue_class_t * vqc = & vq -> vq_class [zio -> io_priority ];
476
- avl_tree_t * t = & vqc -> vqc_queued_tree ;
502
+ avl_tree_t * t = vdev_queue_type_tree (vq , zio -> io_type );
477
503
enum zio_flag flags = zio -> io_flags & ZIO_FLAG_AGG_INHERIT ;
478
504
479
505
if (zio -> io_flags & ZIO_FLAG_DONT_AGGREGATE )
@@ -486,15 +512,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
486
512
zfs_vdev_aggregation_limit =
487
513
MIN (zfs_vdev_aggregation_limit , SPA_MAXBLOCKSIZE );
488
514
489
- /*
490
- * The synchronous i/o queues are not sorted by LBA, so we can't
491
- * find adjacent i/os. These i/os tend to not be tightly clustered,
492
- * or too large to aggregate, so this has little impact on performance.
493
- */
494
- if (zio -> io_priority == ZIO_PRIORITY_SYNC_READ ||
495
- zio -> io_priority == ZIO_PRIORITY_SYNC_WRITE )
496
- return (NULL );
497
-
498
515
first = last = zio ;
499
516
500
517
if (zio -> io_type == ZIO_TYPE_READ )
@@ -627,7 +644,7 @@ vdev_queue_io_to_issue(vdev_queue_t *vq)
627
644
zio_t * zio , * aio ;
628
645
zio_priority_t p ;
629
646
avl_index_t idx ;
630
- vdev_queue_class_t * vqc ;
647
+ avl_tree_t * tree ;
631
648
632
649
again :
633
650
ASSERT (MUTEX_HELD (& vq -> vq_lock ));
@@ -645,14 +662,14 @@ vdev_queue_io_to_issue(vdev_queue_t *vq)
645
662
*
646
663
* For FIFO queues (sync), issue the i/o with the lowest timestamp.
647
664
*/
648
- vqc = & vq -> vq_class [ p ] ;
665
+ tree = vdev_queue_class_tree ( vq , p ) ;
649
666
vq -> vq_io_search .io_timestamp = 0 ;
650
667
vq -> vq_io_search .io_offset = vq -> vq_last_offset + 1 ;
651
- VERIFY3P (avl_find (& vqc -> vqc_queued_tree , & vq -> vq_io_search ,
668
+ VERIFY3P (avl_find (tree , & vq -> vq_io_search ,
652
669
& idx ), = = , NULL );
653
- zio = avl_nearest (& vqc -> vqc_queued_tree , idx , AVL_AFTER );
670
+ zio = avl_nearest (tree , idx , AVL_AFTER );
654
671
if (zio == NULL )
655
- zio = avl_first (& vqc -> vqc_queued_tree );
672
+ zio = avl_first (tree );
656
673
ASSERT3U (zio -> io_priority , = = , p );
657
674
658
675
aio = vdev_queue_aggregate (vq , zio );
0 commit comments