24
24
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
25
25
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
26
26
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
27
+ * Copyright (c) 2015, STRATO AG, Inc. All rights reserved.
27
28
*/
28
29
29
30
/* Portions Copyright 2010 Robert Milkowski */
48
49
#include <sys/sa.h>
49
50
#include <sys/zfs_onexit.h>
50
51
#include <sys/dsl_destroy.h>
52
+ #include <sys/vdev.h>
51
53
52
54
/*
53
55
* Needed to close a window in dnode_move() that allows the objset to be freed
54
56
* before it can be safely accessed.
55
57
*/
56
58
krwlock_t os_lock ;
57
59
60
+ /*
61
+ * Tunable to overwrite the maximum number of threads for the parallization
62
+ * of dmu_objset_find_dp, needed to speed up the import of pools with many
63
+ * datasets.
64
+ * Default is 4 times the number of leaf vdevs.
65
+ */
66
+ int dmu_find_threads = 0 ;
67
+
68
+ static void dmu_objset_find_dp_cb (void * arg );
69
+
58
70
void
59
71
dmu_objset_init (void )
60
72
{
@@ -504,6 +516,25 @@ dmu_objset_hold(const char *name, void *tag, objset_t **osp)
504
516
return (err );
505
517
}
506
518
519
+ static int
520
+ dmu_objset_own_impl (dsl_dataset_t * ds , dmu_objset_type_t type ,
521
+ boolean_t readonly , void * tag , objset_t * * osp )
522
+ {
523
+ int err ;
524
+
525
+ err = dmu_objset_from_ds (ds , osp );
526
+ if (err != 0 ) {
527
+ dsl_dataset_disown (ds , tag );
528
+ } else if (type != DMU_OST_ANY && type != (* osp )-> os_phys -> os_type ) {
529
+ dsl_dataset_disown (ds , tag );
530
+ return (SET_ERROR (EINVAL ));
531
+ } else if (!readonly && dsl_dataset_is_snapshot (ds )) {
532
+ dsl_dataset_disown (ds , tag );
533
+ return (SET_ERROR (EROFS ));
534
+ }
535
+ return (err );
536
+ }
537
+
507
538
/*
508
539
* dsl_pool must not be held when this is called.
509
540
* Upon successful return, there will be a longhold on the dataset,
@@ -525,21 +556,26 @@ dmu_objset_own(const char *name, dmu_objset_type_t type,
525
556
dsl_pool_rele (dp , FTAG );
526
557
return (err );
527
558
}
528
-
529
- err = dmu_objset_from_ds (ds , osp );
559
+ err = dmu_objset_own_impl (ds , type , readonly , tag , osp );
530
560
dsl_pool_rele (dp , FTAG );
531
- if (err != 0 ) {
532
- dsl_dataset_disown (ds , tag );
533
- } else if (type != DMU_OST_ANY && type != (* osp )-> os_phys -> os_type ) {
534
- dsl_dataset_disown (ds , tag );
535
- return (SET_ERROR (EINVAL ));
536
- } else if (!readonly && ds -> ds_is_snapshot ) {
537
- dsl_dataset_disown (ds , tag );
538
- return (SET_ERROR (EROFS ));
539
- }
561
+
540
562
return (err );
541
563
}
542
564
565
+ int
566
+ dmu_objset_own_obj (dsl_pool_t * dp , uint64_t obj , dmu_objset_type_t type ,
567
+ boolean_t readonly , void * tag , objset_t * * osp )
568
+ {
569
+ dsl_dataset_t * ds ;
570
+ int err ;
571
+
572
+ err = dsl_dataset_own_obj (dp , obj , tag , & ds );
573
+ if (err != 0 )
574
+ return (err );
575
+
576
+ return (dmu_objset_own_impl (ds , type , readonly , tag , osp ));
577
+ }
578
+
543
579
void
544
580
dmu_objset_rele (objset_t * os , void * tag )
545
581
{
@@ -1618,30 +1654,41 @@ dmu_dir_list_next(objset_t *os, int namelen, char *name,
1618
1654
return (0 );
1619
1655
}
1620
1656
1621
- /*
1622
- * Find objsets under and including ddobj, call func(ds) on each.
1623
- */
1624
- int
1625
- dmu_objset_find_dp (dsl_pool_t * dp , uint64_t ddobj ,
1626
- int func (dsl_pool_t * , dsl_dataset_t * , void * ), void * arg , int flags )
1657
+ typedef struct dmu_objset_find_ctx {
1658
+ taskq_t * dc_tq ;
1659
+ dsl_pool_t * dc_dp ;
1660
+ uint64_t dc_ddobj ;
1661
+ int (* dc_func )(dsl_pool_t * , dsl_dataset_t * , void * );
1662
+ void * dc_arg ;
1663
+ int dc_flags ;
1664
+ kmutex_t * dc_error_lock ;
1665
+ int * dc_error ;
1666
+ } dmu_objset_find_ctx_t ;
1667
+
1668
+ static void
1669
+ dmu_objset_find_dp_impl (dmu_objset_find_ctx_t * dcp )
1627
1670
{
1671
+ dsl_pool_t * dp = dcp -> dc_dp ;
1672
+ dmu_objset_find_ctx_t * child_dcp ;
1628
1673
dsl_dir_t * dd ;
1629
1674
dsl_dataset_t * ds ;
1630
1675
zap_cursor_t zc ;
1631
1676
zap_attribute_t * attr ;
1632
1677
uint64_t thisobj ;
1633
- int err ;
1678
+ int err = 0 ;
1634
1679
1635
- ASSERT (dsl_pool_config_held (dp ));
1680
+ /* don't process if there already was an error */
1681
+ if (* dcp -> dc_error != 0 )
1682
+ goto out ;
1636
1683
1637
- err = dsl_dir_hold_obj (dp , ddobj , NULL , FTAG , & dd );
1684
+ err = dsl_dir_hold_obj (dp , dcp -> dc_ddobj , NULL , FTAG , & dd );
1638
1685
if (err != 0 )
1639
- return ( err ) ;
1686
+ goto out ;
1640
1687
1641
1688
/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
1642
1689
if (dd -> dd_myname [0 ] == '$' ) {
1643
1690
dsl_dir_rele (dd , FTAG );
1644
- return ( 0 ) ;
1691
+ goto out ;
1645
1692
}
1646
1693
1647
1694
thisobj = dsl_dir_phys (dd )-> dd_head_dataset_obj ;
@@ -1650,7 +1697,7 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
1650
1697
/*
1651
1698
* Iterate over all children.
1652
1699
*/
1653
- if (flags & DS_FIND_CHILDREN ) {
1700
+ if (dcp -> dc_flags & DS_FIND_CHILDREN ) {
1654
1701
for (zap_cursor_init (& zc , dp -> dp_meta_objset ,
1655
1702
dsl_dir_phys (dd )-> dd_child_dir_zapobj );
1656
1703
zap_cursor_retrieve (& zc , attr ) == 0 ;
@@ -1659,24 +1706,22 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
1659
1706
sizeof (uint64_t ));
1660
1707
ASSERT3U (attr -> za_num_integers , = = , 1 );
1661
1708
1662
- err = dmu_objset_find_dp (dp , attr -> za_first_integer ,
1663
- func , arg , flags );
1664
- if (err != 0 )
1665
- break ;
1709
+ child_dcp = kmem_alloc (sizeof (* child_dcp ), KM_SLEEP );
1710
+ * child_dcp = * dcp ;
1711
+ child_dcp -> dc_ddobj = attr -> za_first_integer ;
1712
+ if (dcp -> dc_tq != NULL )
1713
+ (void ) taskq_dispatch (dcp -> dc_tq ,
1714
+ dmu_objset_find_dp_cb , child_dcp , TQ_SLEEP );
1715
+ else
1716
+ dmu_objset_find_dp_impl (child_dcp );
1666
1717
}
1667
1718
zap_cursor_fini (& zc );
1668
-
1669
- if (err != 0 ) {
1670
- dsl_dir_rele (dd , FTAG );
1671
- kmem_free (attr , sizeof (zap_attribute_t ));
1672
- return (err );
1673
- }
1674
1719
}
1675
1720
1676
1721
/*
1677
1722
* Iterate over all snapshots.
1678
1723
*/
1679
- if (flags & DS_FIND_SNAPSHOTS ) {
1724
+ if (dcp -> dc_flags & DS_FIND_SNAPSHOTS ) {
1680
1725
dsl_dataset_t * ds ;
1681
1726
err = dsl_dataset_hold_obj (dp , thisobj , FTAG , & ds );
1682
1727
@@ -1697,7 +1742,7 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
1697
1742
attr -> za_first_integer , FTAG , & ds );
1698
1743
if (err != 0 )
1699
1744
break ;
1700
- err = func (dp , ds , arg );
1745
+ err = dcp -> dc_func (dp , ds , dcp -> dc_arg );
1701
1746
dsl_dataset_rele (ds , FTAG );
1702
1747
if (err != 0 )
1703
1748
break ;
@@ -1710,17 +1755,115 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
1710
1755
kmem_free (attr , sizeof (zap_attribute_t ));
1711
1756
1712
1757
if (err != 0 )
1713
- return ( err ) ;
1758
+ goto out ;
1714
1759
1715
1760
/*
1716
1761
* Apply to self.
1717
1762
*/
1718
1763
err = dsl_dataset_hold_obj (dp , thisobj , FTAG , & ds );
1719
1764
if (err != 0 )
1720
- return ( err ) ;
1721
- err = func (dp , ds , arg );
1765
+ goto out ;
1766
+ err = dcp -> dc_func (dp , ds , dcp -> dc_arg );
1722
1767
dsl_dataset_rele (ds , FTAG );
1723
- return (err );
1768
+
1769
+ out :
1770
+ if (err != 0 ) {
1771
+ mutex_enter (dcp -> dc_error_lock );
1772
+ /* only keep first error */
1773
+ if (* dcp -> dc_error == 0 )
1774
+ * dcp -> dc_error = err ;
1775
+ mutex_exit (dcp -> dc_error_lock );
1776
+ }
1777
+
1778
+ kmem_free (dcp , sizeof (* dcp ));
1779
+ }
1780
+
1781
+ static void
1782
+ dmu_objset_find_dp_cb (void * arg )
1783
+ {
1784
+ dmu_objset_find_ctx_t * dcp = arg ;
1785
+ dsl_pool_t * dp = dcp -> dc_dp ;
1786
+
1787
+ dsl_pool_config_enter (dp , FTAG );
1788
+
1789
+ dmu_objset_find_dp_impl (dcp );
1790
+
1791
+ dsl_pool_config_exit (dp , FTAG );
1792
+ }
1793
+
1794
+ /*
1795
+ * Find objsets under and including ddobj, call func(ds) on each.
1796
+ * The order for the enumeration is completely undefined.
1797
+ * func is called with dsl_pool_config held.
1798
+ */
1799
+ int
1800
+ dmu_objset_find_dp (dsl_pool_t * dp , uint64_t ddobj ,
1801
+ int func (dsl_pool_t * , dsl_dataset_t * , void * ), void * arg , int flags )
1802
+ {
1803
+ int error = 0 ;
1804
+ taskq_t * tq = NULL ;
1805
+ int ntasks ;
1806
+ dmu_objset_find_ctx_t * dcp ;
1807
+ kmutex_t err_lock ;
1808
+
1809
+ mutex_init (& err_lock , NULL , MUTEX_DEFAULT , NULL );
1810
+ dcp = kmem_alloc (sizeof (* dcp ), KM_SLEEP );
1811
+ dcp -> dc_tq = NULL ;
1812
+ dcp -> dc_dp = dp ;
1813
+ dcp -> dc_ddobj = ddobj ;
1814
+ dcp -> dc_func = func ;
1815
+ dcp -> dc_arg = arg ;
1816
+ dcp -> dc_flags = flags ;
1817
+ dcp -> dc_error_lock = & err_lock ;
1818
+ dcp -> dc_error = & error ;
1819
+
1820
+ if ((flags & DS_FIND_SERIALIZE ) || dsl_pool_config_held_writer (dp )) {
1821
+ /*
1822
+ * In case a write lock is held we can't make use of
1823
+ * parallelism, as down the stack of the worker threads
1824
+ * the lock is asserted via dsl_pool_config_held.
1825
+ * In case of a read lock this is solved by getting a read
1826
+ * lock in each worker thread, which isn't possible in case
1827
+ * of a writer lock. So we fall back to the synchronous path
1828
+ * here.
1829
+ * In the future it might be possible to get some magic into
1830
+ * dsl_pool_config_held in a way that it returns true for
1831
+ * the worker threads so that a single lock held from this
1832
+ * thread suffices. For now, stay single threaded.
1833
+ */
1834
+ dmu_objset_find_dp_impl (dcp );
1835
+
1836
+ return (error );
1837
+ }
1838
+
1839
+ ntasks = dmu_find_threads ;
1840
+ if (ntasks == 0 )
1841
+ ntasks = vdev_count_leaves (dp -> dp_spa ) * 4 ;
1842
+ tq = taskq_create ("dmu_objset_find" , ntasks , minclsyspri , ntasks ,
1843
+ INT_MAX , 0 );
1844
+ if (tq == NULL ) {
1845
+ kmem_free (dcp , sizeof (* dcp ));
1846
+ return (SET_ERROR (ENOMEM ));
1847
+ }
1848
+ dcp -> dc_tq = tq ;
1849
+
1850
+ /* dcp will be freed by task */
1851
+ (void ) taskq_dispatch (tq , dmu_objset_find_dp_cb , dcp , TQ_SLEEP );
1852
+
1853
+ /*
1854
+ * PORTING: this code relies on the property of taskq_wait to wait
1855
+ * until no more tasks are queued and no more tasks are active. As
1856
+ * we always queue new tasks from within other tasks, task_wait
1857
+ * reliably waits for the full recursion to finish, even though we
1858
+ * enqueue new tasks after taskq_wait has been called.
1859
+ * On platforms other than illumos, taskq_wait may not have this
1860
+ * property.
1861
+ */
1862
+ taskq_wait (tq );
1863
+ taskq_destroy (tq );
1864
+ mutex_destroy (& err_lock );
1865
+
1866
+ return (error );
1724
1867
}
1725
1868
1726
1869
/*
0 commit comments