Skip to content

Commit

Permalink
ext4: fix deadlock due to mbcache entry corruption
Browse files Browse the repository at this point in the history
[ Upstream commit a44e84a ]

When manipulating xattr blocks, we can deadlock infinitely looping
inside ext4_xattr_block_set() where we constantly keep finding xattr
block for reuse in mbcache but we are unable to reuse it because its
reference count is too big. This happens because cache entry for the
xattr block is marked as reusable (e_reusable set) although its
reference count is too big. When this inconsistency happens, this
inconsistent state is kept indefinitely and so ext4_xattr_block_set()
keeps retrying indefinitely.

The inconsistent state is caused by non-atomic update of e_reusable bit.
e_reusable is part of a bitfield and e_reusable update can race with
update of e_referenced bit in the same bitfield resulting in loss of one
of the updates. Fix the problem by using atomic bitops instead.

This bug has been around for many years, but it became *much* easier
to hit after commit 65f8b80 ("ext4: fix race when reusing xattr
blocks").

Cc: stable@vger.kernel.org
Fixes: 6048c64 ("mbcache: add reusable flag to cache entries")
Fixes: 65f8b80 ("ext4: fix race when reusing xattr blocks")
Reported-and-tested-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com>
Reported-by: Thilo Fromm <t-lo@linux.microsoft.com>
Link: https://lore.kernel.org/r/c77bf00f-4618-7149-56f1-b8d1664b9d07@linux.microsoft.com/
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Link: https://lore.kernel.org/r/20221123193950.16758-1-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
jankara authored and gregkh committed Jan 12, 2023
1 parent a6e4094 commit 5bc0b2f
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 10 deletions.
4 changes: 2 additions & 2 deletions fs/ext4/xattr.c
Expand Up @@ -1281,7 +1281,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
ce = mb_cache_entry_get(ea_block_cache, hash,
bh->b_blocknr);
if (ce) {
ce->e_reusable = 1;
set_bit(MBE_REUSABLE_B, &ce->e_flags);
mb_cache_entry_put(ea_block_cache, ce);
}
}
Expand Down Expand Up @@ -2045,7 +2045,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
}
BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
if (ref == EXT4_XATTR_REFCOUNT_MAX)
ce->e_reusable = 0;
clear_bit(MBE_REUSABLE_B, &ce->e_flags);
ea_bdebug(new_bh, "reusing; refcount now=%d",
ref);
ext4_xattr_block_csum_set(inode, new_bh);
Expand Down
14 changes: 8 additions & 6 deletions fs/mbcache.c
Expand Up @@ -94,8 +94,9 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
atomic_set(&entry->e_refcnt, 1);
entry->e_key = key;
entry->e_value = value;
entry->e_reusable = reusable;
entry->e_referenced = 0;
entry->e_flags = 0;
if (reusable)
set_bit(MBE_REUSABLE_B, &entry->e_flags);
head = mb_cache_entry_head(cache, key);
hlist_bl_lock(head);
hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
Expand Down Expand Up @@ -162,7 +163,8 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
while (node) {
entry = hlist_bl_entry(node, struct mb_cache_entry,
e_hash_list);
if (entry->e_key == key && entry->e_reusable &&
if (entry->e_key == key &&
test_bit(MBE_REUSABLE_B, &entry->e_flags) &&
atomic_inc_not_zero(&entry->e_refcnt))
goto out;
node = node->next;
Expand Down Expand Up @@ -318,7 +320,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
void mb_cache_entry_touch(struct mb_cache *cache,
struct mb_cache_entry *entry)
{
entry->e_referenced = 1;
set_bit(MBE_REFERENCED_B, &entry->e_flags);
}
EXPORT_SYMBOL(mb_cache_entry_touch);

Expand All @@ -343,9 +345,9 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
entry = list_first_entry(&cache->c_list,
struct mb_cache_entry, e_list);
/* Drop initial hash reference if there is no user */
if (entry->e_referenced ||
if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
entry->e_referenced = 0;
clear_bit(MBE_REFERENCED_B, &entry->e_flags);
list_move_tail(&entry->e_list, &cache->c_list);
continue;
}
Expand Down
9 changes: 7 additions & 2 deletions include/linux/mbcache.h
Expand Up @@ -10,6 +10,12 @@

struct mb_cache;

/* Cache entry flags */
enum {
MBE_REFERENCED_B = 0,
MBE_REUSABLE_B
};

struct mb_cache_entry {
/* List of entries in cache - protected by cache->c_list_lock */
struct list_head e_list;
Expand All @@ -26,8 +32,7 @@ struct mb_cache_entry {
atomic_t e_refcnt;
/* Key in hash - stable during lifetime of the entry */
u32 e_key;
u32 e_referenced:1;
u32 e_reusable:1;
unsigned long e_flags;
/* User provided value - stable during lifetime of the entry */
u64 e_value;
};
Expand Down

0 comments on commit 5bc0b2f

Please sign in to comment.