Skip to content

Commit

Permalink
ext4: make sure allocate pending entry not fail
Browse files Browse the repository at this point in the history
[ Upstream commit 8e387c8 ]

__insert_pending() allocate memory in atomic context, so the allocation
could fail, but we are not handling that failure now. It could lead
ext4_es_remove_extent() to get wrong reserved clusters, and the global
data blocks reservation count will be incorrect. The same to
extents_status entry preallocation, preallocate pending entry out of the
i_es_lock with __GFP_NOFAIL, make sure __insert_pending() and
__revise_pending() always succeeds.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Cc: stable@kernel.org
Link: https://lore.kernel.org/r/20230824092619.1327976-3-yi.zhang@huaweicloud.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
zhangyi089 authored and gregkh committed Dec 3, 2023
1 parent 8384d8c commit d7eb376
Showing 1 changed file with 89 additions and 34 deletions.
123 changes: 89 additions & 34 deletions fs/ext4/extents_status.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,9 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
struct ext4_inode_info *locked_ei);
static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len);
static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len,
struct pending_reservation **prealloc);

int __init ext4_init_es(void)
{
Expand Down Expand Up @@ -450,6 +451,19 @@ static void ext4_es_list_del(struct inode *inode)
spin_unlock(&sbi->s_es_lock);
}

static inline struct pending_reservation *__alloc_pending(bool nofail)
{
if (!nofail)
return kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);

return kmem_cache_zalloc(ext4_pending_cachep, GFP_KERNEL | __GFP_NOFAIL);
}

static inline void __free_pending(struct pending_reservation *pr)
{
kmem_cache_free(ext4_pending_cachep, pr);
}

/*
* Returns true if we cannot fail to allocate memory for this extent_status
* entry and cannot reclaim it until its status changes.
Expand Down Expand Up @@ -840,11 +854,12 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
{
struct extent_status newes;
ext4_lblk_t end = lblk + len - 1;
int err1 = 0;
int err2 = 0;
int err1 = 0, err2 = 0, err3 = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct extent_status *es1 = NULL;
struct extent_status *es2 = NULL;
struct pending_reservation *pr = NULL;
bool revise_pending = false;

if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
return 0;
Expand Down Expand Up @@ -872,11 +887,17 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,

ext4_es_insert_extent_check(inode, &newes);

revise_pending = sbi->s_cluster_ratio > 1 &&
test_opt(inode->i_sb, DELALLOC) &&
(status & (EXTENT_STATUS_WRITTEN |
EXTENT_STATUS_UNWRITTEN));
retry:
if (err1 && !es1)
es1 = __es_alloc_extent(true);
if ((err1 || err2) && !es2)
es2 = __es_alloc_extent(true);
if ((err1 || err2 || err3) && revise_pending && !pr)
pr = __alloc_pending(true);
write_lock(&EXT4_I(inode)->i_es_lock);

err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
Expand All @@ -901,13 +922,18 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
es2 = NULL;
}

if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
(status & EXTENT_STATUS_WRITTEN ||
status & EXTENT_STATUS_UNWRITTEN))
__revise_pending(inode, lblk, len);
if (revise_pending) {
err3 = __revise_pending(inode, lblk, len, &pr);
if (err3 != 0)
goto error;
if (pr) {
__free_pending(pr);
pr = NULL;
}
}
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
if (err1 || err2)
if (err1 || err2 || err3)
goto retry;

ext4_es_print_tree(inode);
Expand Down Expand Up @@ -1315,7 +1341,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
rc->ndelonly--;
node = rb_next(&pr->rb_node);
rb_erase(&pr->rb_node, &tree->root);
kmem_cache_free(ext4_pending_cachep, pr);
__free_pending(pr);
if (!node)
break;
pr = rb_entry(node, struct pending_reservation,
Expand Down Expand Up @@ -1913,11 +1939,13 @@ static struct pending_reservation *__get_pending(struct inode *inode,
*
* @inode - file containing the cluster
* @lblk - logical block in the cluster to be added
* @prealloc - preallocated pending entry
*
* Returns 0 on successful insertion and -ENOMEM on failure. If the
* pending reservation is already in the set, returns successfully.
*/
static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
static int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
struct pending_reservation **prealloc)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
Expand All @@ -1943,10 +1971,15 @@ static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
}
}

pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
if (pr == NULL) {
ret = -ENOMEM;
goto out;
if (likely(*prealloc == NULL)) {
pr = __alloc_pending(false);
if (!pr) {
ret = -ENOMEM;
goto out;
}
} else {
pr = *prealloc;
*prealloc = NULL;
}
pr->lclu = lclu;

Expand Down Expand Up @@ -1976,7 +2009,7 @@ static void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
if (pr != NULL) {
tree = &EXT4_I(inode)->i_pending_tree;
rb_erase(&pr->rb_node, &tree->root);
kmem_cache_free(ext4_pending_cachep, pr);
__free_pending(pr);
}
}

Expand Down Expand Up @@ -2037,10 +2070,10 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
bool allocated)
{
struct extent_status newes;
int err1 = 0;
int err2 = 0;
int err1 = 0, err2 = 0, err3 = 0;
struct extent_status *es1 = NULL;
struct extent_status *es2 = NULL;
struct pending_reservation *pr = NULL;

if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
return 0;
Expand All @@ -2060,6 +2093,8 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
es1 = __es_alloc_extent(true);
if ((err1 || err2) && !es2)
es2 = __es_alloc_extent(true);
if ((err1 || err2 || err3) && allocated && !pr)
pr = __alloc_pending(true);
write_lock(&EXT4_I(inode)->i_es_lock);

err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
Expand All @@ -2082,11 +2117,18 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
es2 = NULL;
}

if (allocated)
__insert_pending(inode, lblk);
if (allocated) {
err3 = __insert_pending(inode, lblk, &pr);
if (err3 != 0)
goto error;
if (pr) {
__free_pending(pr);
pr = NULL;
}
}
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
if (err1 || err2)
if (err1 || err2 || err3)
goto retry;

ext4_es_print_tree(inode);
Expand Down Expand Up @@ -2192,21 +2234,24 @@ unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
* @inode - file containing the range
* @lblk - logical block defining the start of range
* @len - length of range in blocks
* @prealloc - preallocated pending entry
*
* Used after a newly allocated extent is added to the extents status tree.
* Requires that the extents in the range have either written or unwritten
* status. Must be called while holding i_es_lock.
*/
static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len)
static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len,
struct pending_reservation **prealloc)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_lblk_t end = lblk + len - 1;
ext4_lblk_t first, last;
bool f_del = false, l_del = false;
int ret = 0;

if (len == 0)
return;
return 0;

/*
* Two cases - block range within single cluster and block range
Expand All @@ -2227,36 +2272,46 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
first, lblk - 1);
if (f_del) {
__insert_pending(inode, first);
ret = __insert_pending(inode, first, prealloc);
if (ret < 0)
goto out;
} else {
last = EXT4_LBLK_CMASK(sbi, end) +
sbi->s_cluster_ratio - 1;
if (last != end)
l_del = __es_scan_range(inode,
&ext4_es_is_delonly,
end + 1, last);
if (l_del)
__insert_pending(inode, last);
else
if (l_del) {
ret = __insert_pending(inode, last, prealloc);
if (ret < 0)
goto out;
} else
__remove_pending(inode, last);
}
} else {
first = EXT4_LBLK_CMASK(sbi, lblk);
if (first != lblk)
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
first, lblk - 1);
if (f_del)
__insert_pending(inode, first);
else
if (f_del) {
ret = __insert_pending(inode, first, prealloc);
if (ret < 0)
goto out;
} else
__remove_pending(inode, first);

last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
if (last != end)
l_del = __es_scan_range(inode, &ext4_es_is_delonly,
end + 1, last);
if (l_del)
__insert_pending(inode, last);
else
if (l_del) {
ret = __insert_pending(inode, last, prealloc);
if (ret < 0)
goto out;
} else
__remove_pending(inode, last);
}
out:
return ret;
}

0 comments on commit d7eb376

Please sign in to comment.