Skip to content

Commit

Permalink
ext4: main fast-commit commit path
Browse files Browse the repository at this point in the history
This patch adds main fast commit commit path handlers. The overall
patch can be divided into two inter-related parts:

(A) Metadata updates tracking

    This part consists of helper functions to track changes that need
    to be committed during a commit operation. These updates are
    maintained by Ext4 in different in-memory queues. Following are
    the APIs and their short description that are implemented in this
    patch:

    - ext4_fc_track_link/unlink/creat() - Track unlink. link and creat
      operations
    - ext4_fc_track_range() - Track changed logical block offsets
      inodes
    - ext4_fc_track_inode() - Track inodes
    - ext4_fc_mark_ineligible() - Mark file system fast commit
      ineligible()
    - ext4_fc_start_update() / ext4_fc_stop_update() /
      ext4_fc_start_ineligible() / ext4_fc_stop_ineligible() These
      functions are useful for co-ordinating inode updates with
      commits.

(B) Main commit Path

    This part consists of functions to convert updates tracked in
    in-memory data structures into on-disk commits. Function
    ext4_fc_commit() is the main entry point to commit path.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/20201015203802.3597742-6-harshadshirwadkar@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
  • Loading branch information
harshadjs authored and tytso committed Oct 22, 2020
1 parent ff780b9 commit aa75f4d
Show file tree
Hide file tree
Showing 13 changed files with 1,707 additions and 29 deletions.
2 changes: 2 additions & 0 deletions fs/ext4/acl.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
if (IS_ERR(handle))
return PTR_ERR(handle);
ext4_fc_start_update(inode);

if ((type == ACL_TYPE_ACCESS) && acl) {
error = posix_acl_update_mode(inode, &mode, &acl);
Expand All @@ -259,6 +260,7 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
}
out_stop:
ext4_journal_stop(handle);
ext4_fc_stop_update(inode);
if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
return error;
Expand Down
70 changes: 70 additions & 0 deletions fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -1021,6 +1021,31 @@ struct ext4_inode_info {

struct list_head i_orphan; /* unlinked but open inodes */

/* Fast commit related info */

struct list_head i_fc_list; /*
* inodes that need fast commit
* protected by sbi->s_fc_lock.
*/

/* Fast commit subtid when this inode was committed */
unsigned int i_fc_committed_subtid;

/* Start of lblk range that needs to be committed in this fast commit */
ext4_lblk_t i_fc_lblk_start;

/* End of lblk range that needs to be committed in this fast commit */
ext4_lblk_t i_fc_lblk_len;

/* Number of ongoing updates on this inode */
atomic_t i_fc_updates;

/* Fast commit wait queue for this inode */
wait_queue_head_t i_fc_wait;

/* Protect concurrent accesses on i_fc_lblk_start, i_fc_lblk_len */
struct mutex i_fc_lock;

/*
* i_disksize keeps track of what the inode size is ON DISK, not
* in memory. During truncate, i_size is set to the new size by
Expand Down Expand Up @@ -1141,6 +1166,10 @@ struct ext4_inode_info {
#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */
#define EXT4_ERROR_FS 0x0002 /* Errors detected */
#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */
#define EXT4_FC_INELIGIBLE 0x0008 /* Fast commit ineligible */
#define EXT4_FC_COMMITTING 0x0010 /* File system underoing a fast
* commit.
*/

/*
* Misc. filesystem flags
Expand Down Expand Up @@ -1613,6 +1642,30 @@ struct ext4_sb_info {
/* Record the errseq of the backing block device */
errseq_t s_bdev_wb_err;
spinlock_t s_bdev_wb_lock;

/* Ext4 fast commit stuff */
atomic_t s_fc_subtid;
atomic_t s_fc_ineligible_updates;
/*
* After commit starts, the main queue gets locked, and the further
* updates get added in the staging queue.
*/
#define FC_Q_MAIN 0
#define FC_Q_STAGING 1
struct list_head s_fc_q[2]; /* Inodes staged for fast commit
* that have data changes in them.
*/
struct list_head s_fc_dentry_q[2]; /* directory entry updates */
unsigned int s_fc_bytes;
/*
* Main fast commit lock. This lock protects accesses to the
* following fields:
* ei->i_fc_list, s_fc_dentry_q, s_fc_q, s_fc_bytes, s_fc_bh.
*/
spinlock_t s_fc_lock;
struct buffer_head *s_fc_bh;
struct ext4_fc_stats s_fc_stats;
u64 s_fc_avg_commit_time;
};

static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
Expand Down Expand Up @@ -1723,6 +1776,7 @@ enum {
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */
EXT4_STATE_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
EXT4_STATE_FC_COMMITTING, /* Fast commit ongoing */
};

#define EXT4_INODE_BIT_FNS(name, field, offset) \
Expand Down Expand Up @@ -2682,6 +2736,22 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
/* fast_commit.c */

void ext4_fc_init(struct super_block *sb, journal_t *journal);
void ext4_fc_init_inode(struct inode *inode);
void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
ext4_lblk_t end);
void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry);
void ext4_fc_track_link(struct inode *inode, struct dentry *dentry);
void ext4_fc_track_create(struct inode *inode, struct dentry *dentry);
void ext4_fc_track_inode(struct inode *inode);
void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
void ext4_fc_start_ineligible(struct super_block *sb, int reason);
void ext4_fc_stop_ineligible(struct super_block *sb);
void ext4_fc_start_update(struct inode *inode);
void ext4_fc_stop_update(struct inode *inode);
void ext4_fc_del(struct inode *inode);
int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
int __init ext4_fc_init_dentry_cache(void);

/* mballoc.c */
extern const struct seq_operations ext4_mb_seq_groups_ops;
extern long ext4_mb_stats;
Expand Down
48 changes: 34 additions & 14 deletions fs/ext4/extents.c
Original file line number Diff line number Diff line change
Expand Up @@ -3723,6 +3723,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
out:
ext4_ext_show_leaf(inode, path);
ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
return err;
}

Expand Down Expand Up @@ -3794,6 +3795,7 @@ convert_initialized_extent(handle_t *handle, struct inode *inode,
if (*allocated > map->m_len)
*allocated = map->m_len;
map->m_len = *allocated;
ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
return 0;
}

Expand Down Expand Up @@ -4327,7 +4329,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
map->m_len = ar.len;
allocated = map->m_len;
ext4_ext_show_leaf(inode, path);

ext4_fc_track_range(inode, map->m_lblk, map->m_lblk + map->m_len - 1);
out:
ext4_ext_drop_refs(path);
kfree(path);
Expand Down Expand Up @@ -4600,7 +4602,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
ret = ext4_mark_inode_dirty(handle, inode);
if (unlikely(ret))
goto out_handle;

ext4_fc_track_range(inode, offset >> inode->i_sb->s_blocksize_bits,
(offset + len - 1) >> inode->i_sb->s_blocksize_bits);
/* Zero out partial block at the edges of the range */
ret = ext4_zero_partial_blocks(handle, inode, offset, len);
if (ret >= 0)
Expand Down Expand Up @@ -4648,23 +4651,34 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
FALLOC_FL_INSERT_RANGE))
return -EOPNOTSUPP;
ext4_fc_track_range(inode, offset >> blkbits,
(offset + len - 1) >> blkbits);

if (mode & FALLOC_FL_PUNCH_HOLE)
return ext4_punch_hole(inode, offset, len);
ext4_fc_start_update(inode);

if (mode & FALLOC_FL_PUNCH_HOLE) {
ret = ext4_punch_hole(inode, offset, len);
goto exit;
}

ret = ext4_convert_inline_data(inode);
if (ret)
return ret;
goto exit;

if (mode & FALLOC_FL_COLLAPSE_RANGE)
return ext4_collapse_range(inode, offset, len);

if (mode & FALLOC_FL_INSERT_RANGE)
return ext4_insert_range(inode, offset, len);
if (mode & FALLOC_FL_COLLAPSE_RANGE) {
ret = ext4_collapse_range(inode, offset, len);
goto exit;
}

if (mode & FALLOC_FL_ZERO_RANGE)
return ext4_zero_range(file, offset, len, mode);
if (mode & FALLOC_FL_INSERT_RANGE) {
ret = ext4_insert_range(inode, offset, len);
goto exit;
}

if (mode & FALLOC_FL_ZERO_RANGE) {
ret = ext4_zero_range(file, offset, len, mode);
goto exit;
}
trace_ext4_fallocate_enter(inode, offset, len, mode);
lblk = offset >> blkbits;

Expand Down Expand Up @@ -4698,12 +4712,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
goto out;

if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
EXT4_I(inode)->i_sync_tid);
ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
EXT4_I(inode)->i_sync_tid);
}
out:
inode_unlock(inode);
trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
exit:
ext4_fc_stop_update(inode);
return ret;
}

Expand Down Expand Up @@ -5291,6 +5307,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ret = PTR_ERR(handle);
goto out_mmap;
}
ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);

down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode, 0);
Expand Down Expand Up @@ -5329,6 +5346,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)

out_stop:
ext4_journal_stop(handle);
ext4_fc_stop_ineligible(sb);
out_mmap:
up_write(&EXT4_I(inode)->i_mmap_sem);
out_mutex:
Expand Down Expand Up @@ -5429,6 +5447,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
ret = PTR_ERR(handle);
goto out_mmap;
}
ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);

/* Expand file to avoid data loss if there is error while shifting */
inode->i_size += len;
Expand Down Expand Up @@ -5503,6 +5522,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)

out_stop:
ext4_journal_stop(handle);
ext4_fc_stop_ineligible(sb);
out_mmap:
up_write(&EXT4_I(inode)->i_mmap_sem);
out_mutex:
Expand Down

0 comments on commit aa75f4d

Please sign in to comment.