Skip to content

Commit

Permalink
Merge tag 'ceph-for-5.19-rc1' of https://github.com/ceph/ceph-client
Browse files Browse the repository at this point in the history
Pull ceph updates from Ilya Dryomov:
 "A big pile of assorted fixes and improvements for the filesystem with
  nothing in particular standing out, except perhaps that the fact that
  the MDS never really maintained atime was made official and thus it's
  no longer updated on the client either.

  We also have a MAINTAINERS update: Jeff is transitioning his
  filesystem maintainership duties to Xiubo"

* tag 'ceph-for-5.19-rc1' of https://github.com/ceph/ceph-client: (23 commits)
  MAINTAINERS: move myself from ceph "Maintainer" to "Reviewer"
  ceph: fix decoding of client session messages flags
  ceph: switch TASK_INTERRUPTIBLE to TASK_KILLABLE
  ceph: remove redundant variable ino
  ceph: try to queue a writeback if revoking fails
  ceph: fix statfs for subdir mounts
  ceph: fix possible deadlock when holding Fwb to get inline_data
  ceph: redirty the page for writepage on failure
  ceph: try to choose the auth MDS if possible for getattr
  ceph: disable updating the atime since cephfs won't maintain it
  ceph: flush the mdlog for filesystem sync
  ceph: rename unsafe_request_wait()
  libceph: use swap() macro instead of taking tmp variable
  ceph: fix statx AT_STATX_DONT_SYNC vs AT_STATX_FORCE_SYNC check
  ceph: no need to invalidate the fscache twice
  ceph: replace usage of found with dedicated list iterator variable
  ceph: use dedicated list iterator variable
  ceph: update the dlease for the hashed dentry when removing
  ceph: stop retrying the request when exceeding 256 times
  ceph: stop forwarding the request when exceeding 256 times
  ...
  • Loading branch information
torvalds committed Jun 2, 2022
2 parents 7c9e960 + af7dc8e commit 17d8e3d
Show file tree
Hide file tree
Showing 12 changed files with 253 additions and 103 deletions.
4 changes: 2 additions & 2 deletions MAINTAINERS
Expand Up @@ -4566,8 +4566,8 @@ F: drivers/power/supply/cw2015_battery.c

CEPH COMMON CODE (LIBCEPH)
M: Ilya Dryomov <idryomov@gmail.com>
M: Jeff Layton <jlayton@kernel.org>
M: Xiubo Li <xiubli@redhat.com>
R: Jeff Layton <jlayton@kernel.org>
L: ceph-devel@vger.kernel.org
S: Supported
W: http://ceph.com/
Expand All @@ -4577,9 +4577,9 @@ F: include/linux/crush/
F: net/ceph/

CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH)
M: Jeff Layton <jlayton@kernel.org>
M: Xiubo Li <xiubli@redhat.com>
M: Ilya Dryomov <idryomov@gmail.com>
R: Jeff Layton <jlayton@kernel.org>
L: ceph-devel@vger.kernel.org
S: Supported
W: http://ceph.com/
Expand Down
13 changes: 6 additions & 7 deletions drivers/block/rbd.c
Expand Up @@ -756,24 +756,23 @@ static struct rbd_client *__rbd_get_client(struct rbd_client *rbdc)
*/
static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
{
struct rbd_client *client_node;
bool found = false;
struct rbd_client *rbdc = NULL, *iter;

if (ceph_opts->flags & CEPH_OPT_NOSHARE)
return NULL;

spin_lock(&rbd_client_list_lock);
list_for_each_entry(client_node, &rbd_client_list, node) {
if (!ceph_compare_options(ceph_opts, client_node->client)) {
__rbd_get_client(client_node);
list_for_each_entry(iter, &rbd_client_list, node) {
if (!ceph_compare_options(ceph_opts, iter->client)) {
__rbd_get_client(iter);

found = true;
rbdc = iter;
break;
}
}
spin_unlock(&rbd_client_list_lock);

return found ? client_node : NULL;
return rbdc;
}

/*
Expand Down
42 changes: 25 additions & 17 deletions fs/ceph/addr.c
Expand Up @@ -256,6 +256,7 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
struct iov_iter iter;
ssize_t err = 0;
size_t len;
int mode;

__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
Expand All @@ -264,7 +265,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
goto out;

/* We need to fetch the inline data. */
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
mode = ceph_try_to_choose_auth_mds(inode, CEPH_STAT_CAP_INLINE_DATA);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto out;
Expand Down Expand Up @@ -604,8 +606,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc,
ceph_wbc.truncate_seq, ceph_wbc.truncate_size,
true);
if (IS_ERR(req))
if (IS_ERR(req)) {
redirty_page_for_writepage(wbc, page);
return PTR_ERR(req);
}

set_page_writeback(page);
if (caching)
Expand Down Expand Up @@ -1644,18 +1648,31 @@ int ceph_uninline_data(struct file *file)
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_request *req;
struct ceph_osd_request *req = NULL;
struct ceph_cap_flush *prealloc_cf;
struct folio *folio = NULL;
u64 inline_version = CEPH_INLINE_NONE;
struct page *pages[1];
int err = 0;
u64 len;

spin_lock(&ci->i_ceph_lock);
inline_version = ci->i_inline_version;
spin_unlock(&ci->i_ceph_lock);

dout("uninline_data %p %llx.%llx inline_version %llu\n",
inode, ceph_vinop(inode), inline_version);

if (inline_version == CEPH_INLINE_NONE)
return 0;

prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
return -ENOMEM;

if (inline_version == 1) /* initial version, no data */
goto out_uninline;

folio = read_mapping_folio(inode->i_mapping, 0, file);
if (IS_ERR(folio)) {
err = PTR_ERR(folio);
Expand All @@ -1664,17 +1681,6 @@ int ceph_uninline_data(struct file *file)

folio_lock(folio);

spin_lock(&ci->i_ceph_lock);
inline_version = ci->i_inline_version;
spin_unlock(&ci->i_ceph_lock);

dout("uninline_data %p %llx.%llx inline_version %llu\n",
inode, ceph_vinop(inode), inline_version);

if (inline_version == 1 || /* initial version, no data */
inline_version == CEPH_INLINE_NONE)
goto out_unlock;

len = i_size_read(inode);
if (len > folio_size(folio))
len = folio_size(folio);
Expand Down Expand Up @@ -1739,6 +1745,7 @@ int ceph_uninline_data(struct file *file)
ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, len, err);

out_uninline:
if (!err) {
int dirty;

Expand All @@ -1757,8 +1764,10 @@ int ceph_uninline_data(struct file *file)
if (err == -ECANCELED)
err = 0;
out_unlock:
folio_unlock(folio);
folio_put(folio);
if (folio) {
folio_unlock(folio);
folio_put(folio);
}
out:
ceph_free_cap_flush(prealloc_cf);
dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
Expand All @@ -1777,7 +1786,6 @@ int ceph_mmap(struct file *file, struct vm_area_struct *vma)

if (!mapping->a_ops->read_folio)
return -ENOEXEC;
file_accessed(file);
vma->vm_ops = &ceph_vmops;
return 0;
}
Expand Down
75 changes: 47 additions & 28 deletions fs/ceph/caps.c
Expand Up @@ -1577,7 +1577,7 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci,

while (first_tid <= last_tid) {
struct ceph_cap *cap = ci->i_auth_cap;
struct ceph_cap_flush *cf;
struct ceph_cap_flush *cf = NULL, *iter;
int ret;

if (!(cap && cap->session == session)) {
Expand All @@ -1587,8 +1587,9 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci,
}

ret = -ENOENT;
list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) {
if (cf->tid >= first_tid) {
list_for_each_entry(iter, &ci->i_cap_flush_list, i_list) {
if (iter->tid >= first_tid) {
cf = iter;
ret = 0;
break;
}
Expand Down Expand Up @@ -1910,6 +1911,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct rb_node *p;
bool queue_invalidate = false;
bool tried_invalidate = false;
bool queue_writeback = false;

if (session)
ceph_get_mds_session(session);
Expand Down Expand Up @@ -2062,10 +2064,27 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
}

/* completed revocation? going down and there are no caps? */
if (revoking && (revoking & cap_used) == 0) {
dout("completed revocation of %s\n",
ceph_cap_string(cap->implemented & ~cap->issued));
goto ack;
if (revoking) {
if ((revoking & cap_used) == 0) {
dout("completed revocation of %s\n",
ceph_cap_string(cap->implemented & ~cap->issued));
goto ack;
}

/*
* If the "i_wrbuffer_ref" was increased by mmap or generic
* cache write just before the ceph_check_caps() is called,
* the Fb capability revoking will fail this time. Then we
* must wait for the BDI's delayed work to flush the dirty
* pages and to release the "i_wrbuffer_ref", which will cost
* at most 5 seconds. That means the MDS needs to wait at
* most 5 seconds to finished the Fb capability's revocation.
*
* Let's queue a writeback for it.
*/
if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
(revoking & CEPH_CAP_FILE_BUFFER))
queue_writeback = true;
}

/* want more caps from mds? */
Expand Down Expand Up @@ -2135,6 +2154,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
spin_unlock(&ci->i_ceph_lock);

ceph_put_mds_session(session);
if (queue_writeback)
ceph_queue_writeback(inode);
if (queue_invalidate)
ceph_queue_invalidate(inode);
}
Expand Down Expand Up @@ -2218,9 +2239,9 @@ static int caps_are_flushed(struct inode *inode, u64 flush_tid)
}

/*
* wait for any unsafe requests to complete.
* flush the mdlog and wait for any unsafe requests to complete.
*/
static int unsafe_request_wait(struct inode *inode)
static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
{
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
Expand Down Expand Up @@ -2336,7 +2357,7 @@ static int unsafe_request_wait(struct inode *inode)
kfree(sessions);
}

dout("unsafe_request_wait %p wait on tid %llu %llu\n",
dout("%s %p wait on tid %llu %llu\n", __func__,
inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL);
if (req1) {
ret = !wait_for_completion_timeout(&req1->r_safe_completion,
Expand Down Expand Up @@ -2380,7 +2401,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
dirty = try_flush_caps(inode, &flush_tid);
dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));

err = unsafe_request_wait(inode);
err = flush_mdlog_and_wait_inode_unsafe_requests(inode);

/*
* only wait on non-file metadata writeback (the mds
Expand Down Expand Up @@ -3182,10 +3203,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
struct ceph_snap_context *snapc)
{
struct inode *inode = &ci->vfs_inode;
struct ceph_cap_snap *capsnap = NULL;
struct ceph_cap_snap *capsnap = NULL, *iter;
int put = 0;
bool last = false;
bool found = false;
bool flush_snaps = false;
bool complete_capsnap = false;

Expand All @@ -3212,14 +3232,14 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
last ? " LAST" : "");
} else {
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
if (capsnap->context == snapc) {
found = true;
list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) {
if (iter->context == snapc) {
capsnap = iter;
break;
}
}

if (!found) {
if (!capsnap) {
/*
* The capsnap should already be removed when removing
* auth cap in the case of a forced unmount.
Expand Down Expand Up @@ -3769,35 +3789,34 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
u64 follows = le64_to_cpu(m->snap_follows);
struct ceph_cap_snap *capsnap;
bool flushed = false;
struct ceph_cap_snap *capsnap = NULL, *iter;
bool wake_ci = false;
bool wake_mdsc = false;

dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
inode, ci, session->s_mds, follows);

spin_lock(&ci->i_ceph_lock);
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
if (capsnap->follows == follows) {
if (capsnap->cap_flush.tid != flush_tid) {
list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) {
if (iter->follows == follows) {
if (iter->cap_flush.tid != flush_tid) {
dout(" cap_snap %p follows %lld tid %lld !="
" %lld\n", capsnap, follows,
flush_tid, capsnap->cap_flush.tid);
" %lld\n", iter, follows,
flush_tid, iter->cap_flush.tid);
break;
}
flushed = true;
capsnap = iter;
break;
} else {
dout(" skipping cap_snap %p follows %lld\n",
capsnap, capsnap->follows);
iter, iter->follows);
}
}
if (flushed)
if (capsnap)
ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
spin_unlock(&ci->i_ceph_lock);

if (flushed) {
if (capsnap) {
ceph_put_snap_context(capsnap->context);
ceph_put_cap_snap(capsnap);
if (wake_ci)
Expand Down

0 comments on commit 17d8e3d

Please sign in to comment.