Skip to content

Commit

Permalink
nvme-rdma: avoid race between time out and tear down
Browse files Browse the repository at this point in the history
[ Upstream commit 3017013 ]

Now use teardown_lock to serialize for time out and tear down. This may
cause abnormal: first cancel all request in tear down, then time out may
complete the request again, but the request may already be freed or
restarted.

To avoid race between time out and tear down, in tear down process,
first we quiesce the queue, and then delete the timer and cancel
the time out work for the queue. At the same time we need to delete
teardown_lock.

Signed-off-by: Chao Leng <lengchao@huawei.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
lostandy26 authored and gregkh committed Nov 18, 2020
1 parent c28eec9 commit 987a990
Showing 1 changed file with 2 additions and 10 deletions.
12 changes: 2 additions & 10 deletions drivers/nvme/host/rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ struct nvme_rdma_ctrl {
struct sockaddr_storage src_addr;

struct nvme_ctrl ctrl;
struct mutex teardown_lock;
bool use_inline_data;
u32 io_queues[HCTX_MAX_TYPES];
};
Expand Down Expand Up @@ -1010,8 +1009,8 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
bool remove)
{
mutex_lock(&ctrl->teardown_lock);
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_sync_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]);
if (ctrl->ctrl.admin_tagset) {
blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset,
Expand All @@ -1021,16 +1020,15 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
if (remove)
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_destroy_admin_queue(ctrl, remove);
mutex_unlock(&ctrl->teardown_lock);
}

static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
bool remove)
{
mutex_lock(&ctrl->teardown_lock);
if (ctrl->ctrl.queue_count > 1) {
nvme_start_freeze(&ctrl->ctrl);
nvme_stop_queues(&ctrl->ctrl);
nvme_sync_io_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl);
if (ctrl->ctrl.tagset) {
blk_mq_tagset_busy_iter(ctrl->ctrl.tagset,
Expand All @@ -1041,7 +1039,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
nvme_start_queues(&ctrl->ctrl);
nvme_rdma_destroy_io_queues(ctrl, remove);
}
mutex_unlock(&ctrl->teardown_lock);
}

static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
Expand Down Expand Up @@ -1975,16 +1972,12 @@ static void nvme_rdma_complete_timed_out(struct request *rq)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_queue *queue = req->queue;
struct nvme_rdma_ctrl *ctrl = queue->ctrl;

/* fence other contexts that may complete the command */
mutex_lock(&ctrl->teardown_lock);
nvme_rdma_stop_queue(queue);
if (!blk_mq_request_completed(rq)) {
nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
blk_mq_complete_request(rq);
}
mutex_unlock(&ctrl->teardown_lock);
}

static enum blk_eh_timer_return
Expand Down Expand Up @@ -2319,7 +2312,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
return ERR_PTR(-ENOMEM);
ctrl->ctrl.opts = opts;
INIT_LIST_HEAD(&ctrl->list);
mutex_init(&ctrl->teardown_lock);

if (!(opts->mask & NVMF_OPT_TRSVCID)) {
opts->trsvcid =
Expand Down

0 comments on commit 987a990

Please sign in to comment.