Skip to content

Commit 672692c

Browse files
grwilsonbehlendorf
authored andcommitted
Illumos 4754, 4755
4754 io issued to near-full luns even after setting noalloc threshold 4755 mg_alloc_failures is no longer needed Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Dan McDonald <danmcd@omniti.com> Approved by: Dan McDonald <danmcd@omniti.com> References: https://www.illumos.org/issues/4754 https://www.illumos.org/issues/4755 illumos/illumos-gate@b6240e8 Ported by: Tim Chase <tim@chase2k.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2533
1 parent 9bd274d commit 672692c

File tree

3 files changed

+7
-52
lines changed

3 files changed

+7
-52
lines changed

include/sys/metaslab_impl.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
*/
2525

2626
/*
27-
* Copyright (c) 2013 by Delphix. All rights reserved.
27+
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
2828
*/
2929

3030
#ifndef _SYS_METASLAB_IMPL_H
@@ -58,7 +58,6 @@ struct metaslab_group {
5858
kmutex_t mg_lock;
5959
avl_tree_t mg_metaslab_tree;
6060
uint64_t mg_aliquot;
61-
uint64_t mg_alloc_failures;
6261
boolean_t mg_allocatable; /* can we allocate? */
6362
uint64_t mg_free_capacity; /* percentage free */
6463
int64_t mg_bias;

module/zfs/metaslab.c

Lines changed: 6 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
* avoid having to load lots of space_maps in a given txg. There are,
4141
* however, some cases where we want to avoid "fast" ganging and instead
4242
* we want to do an exhaustive search of all metaslabs on this device.
43-
* Currently we don't allow any gang, zil, or dump device related allocations
43+
* Currently we don't allow any gang, slog, or dump device related allocations
4444
* to "fast" gang.
4545
*/
4646
#define CAN_FASTGANG(flags) \
@@ -63,14 +63,6 @@ uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
6363
*/
6464
int zfs_condense_pct = 200;
6565

66-
/*
67-
* This value defines the number of allowed allocation failures per vdev.
68-
* If a device reaches this threshold in a given txg then we consider skipping
69-
* allocations on that device. The value of zfs_mg_alloc_failures is computed
70-
* in zio_init() unless it has been overridden in /etc/system.
71-
*/
72-
int zfs_mg_alloc_failures = 0;
73-
7466
/*
7567
* The zfs_mg_noalloc_threshold defines which metaslab groups should
7668
* be eligible for allocation. The value is defined as a percentage of
@@ -1660,10 +1652,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
16601652
void
16611653
metaslab_sync_reassess(metaslab_group_t *mg)
16621654
{
1663-
int64_t failures = mg->mg_alloc_failures;
1664-
16651655
metaslab_group_alloc_update(mg);
1666-
atomic_add_64(&mg->mg_alloc_failures, -failures);
16671656

16681657
/*
16691658
* Preload the next potential metaslabs
@@ -1690,7 +1679,7 @@ metaslab_distance(metaslab_t *msp, dva_t *dva)
16901679

16911680
static uint64_t
16921681
metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
1693-
uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags)
1682+
uint64_t txg, uint64_t min_distance, dva_t *dva, int d)
16941683
{
16951684
spa_t *spa = mg->mg_vd->vdev_spa;
16961685
metaslab_t *msp = NULL;
@@ -1717,10 +1706,9 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
17171706
spa_dbgmsg(spa, "%s: failed to meet weight "
17181707
"requirement: vdev %llu, txg %llu, mg %p, "
17191708
"msp %p, psize %llu, asize %llu, "
1720-
"failures %llu, weight %llu",
1721-
spa_name(spa), mg->mg_vd->vdev_id, txg,
1722-
mg, msp, psize, asize,
1723-
mg->mg_alloc_failures, msp->ms_weight);
1709+
"weight %llu", spa_name(spa),
1710+
mg->mg_vd->vdev_id, txg,
1711+
mg, msp, psize, asize, msp->ms_weight);
17241712
mutex_exit(&mg->mg_lock);
17251713
return (-1ULL);
17261714
}
@@ -1752,27 +1740,6 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
17521740

17531741
mutex_enter(&msp->ms_lock);
17541742

1755-
/*
1756-
* If we've already reached the allowable number of failed
1757-
* allocation attempts on this metaslab group then we
1758-
* consider skipping it. We skip it only if we're allowed
1759-
* to "fast" gang, the physical size is larger than
1760-
* a gang block, and we're attempting to allocate from
1761-
* the primary metaslab.
1762-
*/
1763-
if (mg->mg_alloc_failures > zfs_mg_alloc_failures &&
1764-
CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE &&
1765-
activation_weight == METASLAB_WEIGHT_PRIMARY) {
1766-
spa_dbgmsg(spa, "%s: skipping metaslab group: "
1767-
"vdev %llu, txg %llu, mg %p, msp[%llu] %p, "
1768-
"psize %llu, asize %llu, failures %llu",
1769-
spa_name(spa), mg->mg_vd->vdev_id, txg, mg,
1770-
msp->ms_id, msp, psize, asize,
1771-
mg->mg_alloc_failures);
1772-
mutex_exit(&msp->ms_lock);
1773-
return (-1ULL);
1774-
}
1775-
17761743
/*
17771744
* Ensure that the metaslab we have selected is still
17781745
* capable of handling our request. It's possible that
@@ -1812,8 +1779,6 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
18121779
if ((offset = metaslab_block_alloc(msp, asize)) != -1ULL)
18131780
break;
18141781

1815-
atomic_inc_64(&mg->mg_alloc_failures);
1816-
18171782
metaslab_passivate(msp, metaslab_block_maxsize(msp));
18181783
mutex_exit(&msp->ms_lock);
18191784
}
@@ -1980,7 +1945,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
19801945
ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
19811946

19821947
offset = metaslab_group_alloc(mg, psize, asize, txg, distance,
1983-
dva, d, flags);
1948+
dva, d);
19841949
if (offset != -1ULL) {
19851950
/*
19861951
* If we've just selected this metaslab group,

module/zfs/zio.c

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,6 @@ kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
6060
int zio_bulk_flags = 0;
6161
int zio_delay_max = ZIO_DELAY_MAX;
6262

63-
extern int zfs_mg_alloc_failures;
64-
6563
/*
6664
* The following actions directly effect the spa's sync-to-convergence logic.
6765
* The values below define the sync pass when we start performing the action.
@@ -193,13 +191,6 @@ zio_init(void)
193191
zio_data_buf_cache[c - 1] = zio_data_buf_cache[c];
194192
}
195193

196-
/*
197-
* The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs
198-
* to fail 3 times per txg or 8 failures, whichever is greater.
199-
*/
200-
if (zfs_mg_alloc_failures == 0)
201-
zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8);
202-
203194
zio_inject_init();
204195

205196
lz4_init();

0 commit comments

Comments
 (0)