Skip to content

Commit

Permalink
NFS: Fix flexfiles read failover
Browse files Browse the repository at this point in the history
The current mirrored read failover code is correctly resetting the mirror
index between failed reads, however it is not able to actually flip the
RPC call over to the next RPC client.
The end result is that we keep resending the RPC call to the same client
over and over.

The fix is to use the pnfs_read_resend_pnfs() mechanism to schedule a
new RPC call, but we need to add the ability to pass in a mirror
index so that we always retry the next mirror in the list.

Fixes: 166bd5b ("pNFS/flexfiles: Fix layoutstats handling during read failovers")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
  • Loading branch information
Trond Myklebust committed Aug 12, 2020
1 parent a503291 commit 563c53e
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 16 deletions.
50 changes: 36 additions & 14 deletions fs/nfs/flexfilelayout/flexfilelayout.c
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,19 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx);
}

static struct nfs4_pnfs_ds *
ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, int *best_idx)
{
struct pnfs_layout_segment *lseg = pgio->pg_lseg;
struct nfs4_pnfs_ds *ds;

ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx,
best_idx);
if (ds || !pgio->pg_mirror_idx)
return ds;
return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx);
}

static void
ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req,
Expand Down Expand Up @@ -840,7 +853,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
goto out_nolseg;
}

ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx);
ds = ff_layout_get_ds_for_read(pgio, &ds_idx);
if (!ds) {
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
Expand Down Expand Up @@ -1022,11 +1035,24 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
}
}

static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
{
u32 idx = hdr->pgio_mirror_idx + 1;
int new_idx = 0;

if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx))
ff_layout_send_layouterror(hdr->lseg);
else
pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
pnfs_read_resend_pnfs(hdr, new_idx);
}

static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
{
struct rpc_task *task = &hdr->task;

pnfs_layoutcommit_inode(hdr->inode, false);
pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);

if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
dprintk("%s Reset task %5u for i/o through MDS "
Expand Down Expand Up @@ -1228,6 +1254,12 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
break;
case NFS4ERR_NXIO:
ff_layout_mark_ds_unreachable(lseg, idx);
/*
* Don't return the layout if this is a read and we still
* have layouts to try
*/
if (opnum == OP_READ)
break;
/* Fallthrough */
default:
pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode,
Expand All @@ -1241,7 +1273,6 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
static int ff_layout_read_done_cb(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
int new_idx = hdr->pgio_mirror_idx;
int err;

if (task->tk_status < 0) {
Expand All @@ -1261,10 +1292,6 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
switch (err) {
case -NFS4ERR_RESET_TO_PNFS:
if (ff_layout_choose_best_ds_for_read(hdr->lseg,
hdr->pgio_mirror_idx + 1,
&new_idx))
goto out_layouterror;
set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
return task->tk_status;
case -NFS4ERR_RESET_TO_MDS:
Expand All @@ -1275,10 +1302,6 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
}

return 0;
out_layouterror:
ff_layout_read_record_layoutstats_done(task, hdr);
ff_layout_send_layouterror(hdr->lseg);
hdr->pgio_mirror_idx = new_idx;
out_eagain:
rpc_restart_call_prepare(task);
return -EAGAIN;
Expand Down Expand Up @@ -1405,10 +1428,9 @@ static void ff_layout_read_release(void *data)
struct nfs_pgio_header *hdr = data;

ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) {
ff_layout_send_layouterror(hdr->lseg);
pnfs_read_resend_pnfs(hdr);
} else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
ff_layout_resend_pnfs_read(hdr);
else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
ff_layout_reset_read(hdr);
pnfs_generic_rw_release(data);
}
Expand Down
4 changes: 3 additions & 1 deletion fs/nfs/pnfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -2939,7 +2939,8 @@ pnfs_try_to_read_data(struct nfs_pgio_header *hdr,
}

/* Resend all requests through pnfs. */
void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr)
void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr,
unsigned int mirror_idx)
{
struct nfs_pageio_descriptor pgio;

Expand All @@ -2950,6 +2951,7 @@ void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr)

nfs_pageio_init_read(&pgio, hdr->inode, false,
hdr->completion_ops);
pgio.pg_mirror_idx = mirror_idx;
hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr);
}
}
Expand Down
2 changes: 1 addition & 1 deletion fs/nfs/pnfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ int _pnfs_return_layout(struct inode *);
int pnfs_commit_and_return_layout(struct inode *);
void pnfs_ld_write_done(struct nfs_pgio_header *);
void pnfs_ld_read_done(struct nfs_pgio_header *);
void pnfs_read_resend_pnfs(struct nfs_pgio_header *);
void pnfs_read_resend_pnfs(struct nfs_pgio_header *, unsigned int mirror_idx);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx,
loff_t pos,
Expand Down

0 comments on commit 563c53e

Please sign in to comment.