Skip to content

Commit

Permalink
nilfs2: fix lockdep warnings in page operations for btree nodes
Browse files Browse the repository at this point in the history
[ Upstream commit e897be1 ]

Patch series "nilfs2 lockdep warning fixes".

The first two are to resolve the lockdep warning issue, and the last one
is the accompanying cleanup and low priority.

Based on your comment, this series solves the issue by separating inode
object as needed.  Since I was worried about the impact of the object
composition changes, I tested the series carefully not to cause
regressions especially for delicate functions such like disk space
reclamation and snapshots.

This patch (of 3):

If CONFIG_LOCKDEP is enabled, nilfs2 hits lockdep warnings at
inode_to_wb() during page/folio operations for btree nodes:

  WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 inode_to_wb include/linux/backing-dev.h:269 [inline]
  WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 folio_account_dirtied mm/page-writeback.c:2460 [inline]
  WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 __folio_mark_dirty+0xa7c/0xe30 mm/page-writeback.c:2509
  Modules linked in:
  ...
  RIP: 0010:inode_to_wb include/linux/backing-dev.h:269 [inline]
  RIP: 0010:folio_account_dirtied mm/page-writeback.c:2460 [inline]
  RIP: 0010:__folio_mark_dirty+0xa7c/0xe30 mm/page-writeback.c:2509
  ...
  Call Trace:
    __set_page_dirty include/linux/pagemap.h:834 [inline]
    mark_buffer_dirty+0x4e6/0x650 fs/buffer.c:1145
    nilfs_btree_propagate_p fs/nilfs2/btree.c:1889 [inline]
    nilfs_btree_propagate+0x4ae/0xea0 fs/nilfs2/btree.c:2085
    nilfs_bmap_propagate+0x73/0x170 fs/nilfs2/bmap.c:337
    nilfs_collect_dat_data+0x45/0xd0 fs/nilfs2/segment.c:625
    nilfs_segctor_apply_buffers+0x14a/0x470 fs/nilfs2/segment.c:1009
    nilfs_segctor_scan_file+0x47a/0x700 fs/nilfs2/segment.c:1048
    nilfs_segctor_collect_blocks fs/nilfs2/segment.c:1224 [inline]
    nilfs_segctor_collect fs/nilfs2/segment.c:1494 [inline]
    nilfs_segctor_do_construct+0x14f3/0x6c60 fs/nilfs2/segment.c:2036
    nilfs_segctor_construct+0x7a7/0xb30 fs/nilfs2/segment.c:2372
    nilfs_segctor_thread_construct fs/nilfs2/segment.c:2480 [inline]
    nilfs_segctor_thread+0x3c3/0xf90 fs/nilfs2/segment.c:2563
    kthread+0x405/0x4f0 kernel/kthread.c:327
    ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295

This is because nilfs2 uses two page caches for each inode and
inode->i_mapping never points to one of them, the btree node cache.

This causes inode_to_wb(inode) to refer to a different page cache than
the caller page/folio operations such like __folio_start_writeback(),
__folio_end_writeback(), or __folio_mark_dirty() acquired the lock.

This patch resolves the issue by allocating and using an additional
inode to hold the page cache of btree nodes.  The inode is attached
one-to-one to the traditional nilfs2 inode if it requires a block
mapping with b-tree.  This setup change is in memory only and does not
affect the disk format.

Link: https://lkml.kernel.org/r/1647867427-30498-1-git-send-email-konishi.ryusuke@gmail.com
Link: https://lkml.kernel.org/r/1647867427-30498-2-git-send-email-konishi.ryusuke@gmail.com
Link: https://lore.kernel.org/r/YXrYvIo8YRnAOJCj@casper.infradead.org
Link: https://lore.kernel.org/r/9a20b33d-b38f-b4a2-4742-c1eb5b8e4d6c@redhat.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Reported-by: syzbot+0d5b462a6f07447991b3@syzkaller.appspotmail.com
Reported-by: syzbot+34ef28bb2aeb28724aa0@syzkaller.appspotmail.com
Reported-by: Hao Sun <sunhao.th@gmail.com>
Reported-by: David Hildenbrand <david@redhat.com>
Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
konis authored and gregkh committed May 25, 2022
1 parent aca18ba commit d626fcd
Show file tree
Hide file tree
Showing 10 changed files with 154 additions and 50 deletions.
23 changes: 20 additions & 3 deletions fs/nilfs2/btnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,23 @@
#include "page.h"
#include "btnode.h"


/**
* nilfs_init_btnc_inode - initialize B-tree node cache inode
* @btnc_inode: inode to be initialized
*
* nilfs_init_btnc_inode() sets up an inode for B-tree node cache.
*/
void nilfs_init_btnc_inode(struct inode *btnc_inode)
{
struct nilfs_inode_info *ii = NILFS_I(btnc_inode);

btnc_inode->i_mode = S_IFREG;
ii->i_flags = 0;
memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap));
mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS);
}

void nilfs_btnode_cache_clear(struct address_space *btnc)
{
invalidate_mapping_pages(btnc, 0, -1);
Expand All @@ -29,7 +46,7 @@ void nilfs_btnode_cache_clear(struct address_space *btnc)
struct buffer_head *
nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
{
struct inode *inode = NILFS_BTNC_I(btnc);
struct inode *inode = btnc->host;
struct buffer_head *bh;

bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node));
Expand Down Expand Up @@ -57,7 +74,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
struct buffer_head **pbh, sector_t *submit_ptr)
{
struct buffer_head *bh;
struct inode *inode = NILFS_BTNC_I(btnc);
struct inode *inode = btnc->host;
struct page *page;
int err;

Expand Down Expand Up @@ -157,7 +174,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
struct nilfs_btnode_chkey_ctxt *ctxt)
{
struct buffer_head *obh, *nbh;
struct inode *inode = NILFS_BTNC_I(btnc);
struct inode *inode = btnc->host;
__u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
int err;

Expand Down
1 change: 1 addition & 0 deletions fs/nilfs2/btnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ struct nilfs_btnode_chkey_ctxt {
struct buffer_head *newbh;
};

void nilfs_init_btnc_inode(struct inode *btnc_inode);
void nilfs_btnode_cache_clear(struct address_space *);
struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
__u64 blocknr);
Expand Down
27 changes: 19 additions & 8 deletions fs/nilfs2/btree.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path)
static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
__u64 ptr, struct buffer_head **bhp)
{
struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
struct address_space *btnc = btnc_inode->i_mapping;
struct buffer_head *bh;

bh = nilfs_btnode_create_block(btnc, ptr);
Expand Down Expand Up @@ -470,7 +471,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
struct buffer_head **bhp,
const struct nilfs_btree_readahead_info *ra)
{
struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
struct address_space *btnc = btnc_inode->i_mapping;
struct buffer_head *bh, *ra_bh;
sector_t submit_ptr = 0;
int ret;
Expand Down Expand Up @@ -1742,6 +1744,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
dat = nilfs_bmap_get_dat(btree);
}

ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
if (ret < 0)
return ret;

ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
if (ret < 0)
return ret;
Expand Down Expand Up @@ -1914,7 +1920,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
path[level].bp_ctxt.bh = path[level].bp_bh;
ret = nilfs_btnode_prepare_change_key(
&NILFS_BMAP_I(btree)->i_btnode_cache,
NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
&path[level].bp_ctxt);
if (ret < 0) {
nilfs_dat_abort_update(dat,
Expand All @@ -1940,7 +1946,7 @@ static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,

if (buffer_nilfs_node(path[level].bp_bh)) {
nilfs_btnode_commit_change_key(
&NILFS_BMAP_I(btree)->i_btnode_cache,
NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
&path[level].bp_ctxt);
path[level].bp_bh = path[level].bp_ctxt.bh;
}
Expand All @@ -1959,7 +1965,7 @@ static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
&path[level].bp_newreq.bpr_req);
if (buffer_nilfs_node(path[level].bp_bh))
nilfs_btnode_abort_change_key(
&NILFS_BMAP_I(btree)->i_btnode_cache,
NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
&path[level].bp_ctxt);
}

Expand Down Expand Up @@ -2135,7 +2141,8 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
struct list_head *listp)
{
struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
struct address_space *btcache = btnc_inode->i_mapping;
struct list_head lists[NILFS_BTREE_LEVEL_MAX];
struct pagevec pvec;
struct buffer_head *bh, *head;
Expand Down Expand Up @@ -2189,12 +2196,12 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
path[level].bp_ctxt.newkey = blocknr;
path[level].bp_ctxt.bh = *bh;
ret = nilfs_btnode_prepare_change_key(
&NILFS_BMAP_I(btree)->i_btnode_cache,
NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
&path[level].bp_ctxt);
if (ret < 0)
return ret;
nilfs_btnode_commit_change_key(
&NILFS_BMAP_I(btree)->i_btnode_cache,
NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
&path[level].bp_ctxt);
*bh = path[level].bp_ctxt.bh;
}
Expand Down Expand Up @@ -2399,6 +2406,10 @@ int nilfs_btree_init(struct nilfs_bmap *bmap)

if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
ret = -EIO;
else
ret = nilfs_attach_btree_node_cache(
&NILFS_BMAP_I(bmap)->vfs_inode);

return ret;
}

Expand Down
7 changes: 4 additions & 3 deletions fs/nilfs2/gcinode.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
__u64 vbn, struct buffer_head **out_bh)
{
struct inode *btnc_inode = NILFS_I(inode)->i_assoc_inode;
int ret;

ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache,
ret = nilfs_btnode_submit_block(btnc_inode->i_mapping,
vbn ? : pbn, pbn, REQ_OP_READ, 0,
out_bh, &pbn);
if (ret == -EEXIST) /* internal code (cache hit) */
Expand Down Expand Up @@ -170,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode)
ii->i_flags = 0;
nilfs_bmap_init_gc(ii->i_bmap);

return 0;
return nilfs_attach_btree_node_cache(inode);
}

/**
Expand All @@ -185,7 +186,7 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs)
ii = list_first_entry(head, struct nilfs_inode_info, i_dirty);
list_del_init(&ii->i_dirty);
truncate_inode_pages(&ii->vfs_inode.i_data, 0);
nilfs_btnode_cache_clear(&ii->i_btnode_cache);
nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping);
iput(&ii->vfs_inode);
}
}
104 changes: 90 additions & 14 deletions fs/nilfs2/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,14 @@
* @cno: checkpoint number
* @root: pointer on NILFS root object (mounted checkpoint)
* @for_gc: inode for GC flag
* @for_btnc: inode for B-tree node cache flag
*/
struct nilfs_iget_args {
u64 ino;
__u64 cno;
struct nilfs_root *root;
int for_gc;
bool for_gc;
bool for_btnc;
};

static int nilfs_iget_test(struct inode *inode, void *opaque);
Expand Down Expand Up @@ -314,7 +316,8 @@ static int nilfs_insert_inode_locked(struct inode *inode,
unsigned long ino)
{
struct nilfs_iget_args args = {
.ino = ino, .root = root, .cno = 0, .for_gc = 0
.ino = ino, .root = root, .cno = 0, .for_gc = false,
.for_btnc = false
};

return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
Expand Down Expand Up @@ -527,6 +530,13 @@ static int nilfs_iget_test(struct inode *inode, void *opaque)
return 0;

ii = NILFS_I(inode);
if (test_bit(NILFS_I_BTNC, &ii->i_state)) {
if (!args->for_btnc)
return 0;
} else if (args->for_btnc) {
return 0;
}

if (!test_bit(NILFS_I_GCINODE, &ii->i_state))
return !args->for_gc;

Expand All @@ -538,23 +548,24 @@ static int nilfs_iget_set(struct inode *inode, void *opaque)
struct nilfs_iget_args *args = opaque;

inode->i_ino = args->ino;
if (args->for_gc) {
NILFS_I(inode)->i_cno = args->cno;
NILFS_I(inode)->i_root = args->root;
if (args->root && args->ino == NILFS_ROOT_INO)
nilfs_get_root(args->root);

if (args->for_gc)
NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE);
NILFS_I(inode)->i_cno = args->cno;
NILFS_I(inode)->i_root = NULL;
} else {
if (args->root && args->ino == NILFS_ROOT_INO)
nilfs_get_root(args->root);
NILFS_I(inode)->i_root = args->root;
}
if (args->for_btnc)
NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC);
return 0;
}

struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root,
unsigned long ino)
{
struct nilfs_iget_args args = {
.ino = ino, .root = root, .cno = 0, .for_gc = 0
.ino = ino, .root = root, .cno = 0, .for_gc = false,
.for_btnc = false
};

return ilookup5(sb, ino, nilfs_iget_test, &args);
Expand All @@ -564,7 +575,8 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root,
unsigned long ino)
{
struct nilfs_iget_args args = {
.ino = ino, .root = root, .cno = 0, .for_gc = 0
.ino = ino, .root = root, .cno = 0, .for_gc = false,
.for_btnc = false
};

return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
Expand Down Expand Up @@ -595,7 +607,8 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
__u64 cno)
{
struct nilfs_iget_args args = {
.ino = ino, .root = NULL, .cno = cno, .for_gc = 1
.ino = ino, .root = NULL, .cno = cno, .for_gc = true,
.for_btnc = false
};
struct inode *inode;
int err;
Expand All @@ -615,6 +628,68 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
return inode;
}

/**
* nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode
* @inode: inode object
*
* nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode,
* or does nothing if the inode already has it. This function allocates
* an additional inode to maintain page cache of B-tree nodes one-on-one.
*
* Return Value: On success, 0 is returned. On errors, one of the following
* negative error code is returned.
*
* %-ENOMEM - Insufficient memory available.
*/
int nilfs_attach_btree_node_cache(struct inode *inode)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
struct inode *btnc_inode;
struct nilfs_iget_args args;

if (ii->i_assoc_inode)
return 0;

args.ino = inode->i_ino;
args.root = ii->i_root;
args.cno = ii->i_cno;
args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0;
args.for_btnc = true;

btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
nilfs_iget_set, &args);
if (unlikely(!btnc_inode))
return -ENOMEM;
if (btnc_inode->i_state & I_NEW) {
nilfs_init_btnc_inode(btnc_inode);
unlock_new_inode(btnc_inode);
}
NILFS_I(btnc_inode)->i_assoc_inode = inode;
NILFS_I(btnc_inode)->i_bmap = ii->i_bmap;
ii->i_assoc_inode = btnc_inode;

return 0;
}

/**
* nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode
* @inode: inode object
*
* nilfs_detach_btree_node_cache() detaches the B-tree node cache and its
* holder inode bound to @inode, or does nothing if @inode doesn't have it.
*/
void nilfs_detach_btree_node_cache(struct inode *inode)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
struct inode *btnc_inode = ii->i_assoc_inode;

if (btnc_inode) {
NILFS_I(btnc_inode)->i_assoc_inode = NULL;
ii->i_assoc_inode = NULL;
iput(btnc_inode);
}
}

void nilfs_write_inode_common(struct inode *inode,
struct nilfs_inode *raw_inode, int has_bmap)
{
Expand Down Expand Up @@ -762,7 +837,8 @@ static void nilfs_clear_inode(struct inode *inode)
if (test_bit(NILFS_I_BMAP, &ii->i_state))
nilfs_bmap_clear(ii->i_bmap);

nilfs_btnode_cache_clear(&ii->i_btnode_cache);
if (!test_bit(NILFS_I_BTNC, &ii->i_state))
nilfs_detach_btree_node_cache(inode);

if (ii->i_root && inode->i_ino == NILFS_ROOT_INO)
nilfs_put_root(ii->i_root);
Expand Down
7 changes: 4 additions & 3 deletions fs/nilfs2/mdt.c
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ int nilfs_mdt_save_to_shadow_map(struct inode *inode)
goto out;

ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes,
&ii->i_btnode_cache);
ii->i_assoc_inode->i_mapping);
if (ret)
goto out;

Expand Down Expand Up @@ -622,8 +622,9 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode)
nilfs_clear_dirty_pages(inode->i_mapping, true);
nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data);

nilfs_clear_dirty_pages(&ii->i_btnode_cache, true);
nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes);
nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true);
nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping,
&shadow->frozen_btnodes);

nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store);

Expand Down

0 comments on commit d626fcd

Please sign in to comment.