diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0bbf78cc5f75e..4fd242acec3cc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9809,10 +9809,6 @@ static int start_delalloc_inodes(struct btrfs_root *root, &work->work); } else { ret = sync_inode(inode, wbc); - if (!ret && - test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, - &BTRFS_I(inode)->runtime_flags)) - ret = sync_inode(inode, wbc); btrfs_add_delayed_iput(inode); if (ret || wbc->nr_to_write <= 0) goto out; diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 87f258600acc9..facd1c6876f03 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -532,9 +532,49 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, while ((delalloc_bytes || ordered_bytes) && loops < 3) { u64 temp = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT; long nr_pages = min_t(u64, temp, LONG_MAX); + int async_pages; btrfs_start_delalloc_roots(fs_info, nr_pages, true); + /* + * We need to make sure any outstanding async pages are now + * processed before we continue. This is because things like + * sync_inode() try to be smart and skip writing if the inode is + * marked clean. We don't use filemap_fwrite for flushing + * because we want to control how many pages we write out at a + * time, thus this is the only safe way to make sure we've + * waited for outstanding compressed workers to have started + * their jobs and thus have ordered extents set up properly. + * + * This exists because we do not want to wait for each + * individual inode to finish its async work, we simply want to + * start the IO on everybody, and then come back here and wait + * for all of the async work to catch up. Once we're done with + * that we know we'll have ordered extents for everything and we + * can decide if we wait for that or not. + * + * If we choose to replace this in the future, make absolutely + * sure that the proper waiting is being done in the async case, + * as there have been bugs in that area before. + */ + async_pages = atomic_read(&fs_info->async_delalloc_pages); + if (!async_pages) + goto skip_async; + + /* + * We don't want to wait forever, if we wrote less pages in this + * loop than we have outstanding, only wait for that number of + * pages, otherwise we can wait for all async pages to finish + * before continuing. + */ + if (async_pages > nr_pages) + async_pages -= nr_pages; + else + async_pages = 0; + wait_event(fs_info->async_submit_wait, + atomic_read(&fs_info->async_delalloc_pages) <= + async_pages); +skip_async: loops++; if (wait_ordered && !trans) { btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);