Skip to content

Commit

Permalink
block/rnbd-clt: Support polling mode for IO latency optimization
Browse files Browse the repository at this point in the history
RNBD can make double-queues for irq-mode and poll-mode.
For example, on 4-CPU system 8 request-queues are created,
4 for irq-mode and 4 for poll-mode.
If the IO has HIPRI flag, the block-layer will call .poll function
of RNBD. Then IO is sent to the poll-mode queue.
Add optional nr_poll_queues argument for map_devices interface.

To support polling of RNBD, RTRS client creates connections
for both of irq-mode and direct-poll-mode.

For example, on 4-CPU system it could've create 5 connections:
con[0] => user message (softirq cq)
con[1:4] => softirq cq

After this patch, it can create 9 connections:
con[0] => user message (softirq cq)
con[1:4] => softirq cq
con[5:8] => DIRECT-POLL cq

Cc: Leon Romanovsky <leonro@nvidia.com>
Cc: linux-rdma@vger.kernel.org
Signed-off-by: Gioh Kim <gi-oh.kim@ionos.com>
Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
Acked-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Link: https://lore.kernel.org/r/20210419073722.15351-14-gi-oh.kim@ionos.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
Gioh Kim authored and axboe committed Apr 20, 2021
1 parent 12b0653 commit 2958a99
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 34 deletions.
55 changes: 44 additions & 11 deletions drivers/block/rnbd/rnbd-clt-sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ enum {
RNBD_OPT_DEV_PATH = 1 << 2,
RNBD_OPT_ACCESS_MODE = 1 << 3,
RNBD_OPT_SESSNAME = 1 << 6,
RNBD_OPT_NR_POLL_QUEUES = 1 << 7,
};

static const unsigned int rnbd_opt_mandatory[] = {
Expand All @@ -42,12 +43,13 @@ static const unsigned int rnbd_opt_mandatory[] = {
};

static const match_table_t rnbd_opt_tokens = {
{RNBD_OPT_PATH, "path=%s" },
{RNBD_OPT_DEV_PATH, "device_path=%s"},
{RNBD_OPT_DEST_PORT, "dest_port=%d" },
{RNBD_OPT_ACCESS_MODE, "access_mode=%s"},
{RNBD_OPT_SESSNAME, "sessname=%s" },
{RNBD_OPT_ERR, NULL },
{RNBD_OPT_PATH, "path=%s" },
{RNBD_OPT_DEV_PATH, "device_path=%s" },
{RNBD_OPT_DEST_PORT, "dest_port=%d" },
{RNBD_OPT_ACCESS_MODE, "access_mode=%s" },
{RNBD_OPT_SESSNAME, "sessname=%s" },
{RNBD_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" },
{RNBD_OPT_ERR, NULL },
};

struct rnbd_map_options {
Expand All @@ -57,6 +59,7 @@ struct rnbd_map_options {
char *pathname;
u16 *dest_port;
enum rnbd_access_mode *access_mode;
u32 *nr_poll_queues;
};

static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
Expand All @@ -68,7 +71,7 @@ static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
int opt_mask = 0;
int token;
int ret = -EINVAL;
int i, dest_port;
int i, dest_port, nr_poll_queues;
int p_cnt = 0;

options = kstrdup(buf, GFP_KERNEL);
Expand Down Expand Up @@ -178,6 +181,19 @@ static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
kfree(p);
break;

case RNBD_OPT_NR_POLL_QUEUES:
if (match_int(args, &nr_poll_queues) || nr_poll_queues < -1 ||
nr_poll_queues > (int)nr_cpu_ids) {
pr_err("bad nr_poll_queues parameter '%d'\n",
nr_poll_queues);
ret = -EINVAL;
goto out;
}
if (nr_poll_queues == -1)
nr_poll_queues = nr_cpu_ids;
*opt->nr_poll_queues = nr_poll_queues;
break;

default:
pr_err("map_device: Unknown parameter or missing value '%s'\n",
p);
Expand Down Expand Up @@ -227,6 +243,19 @@ static ssize_t state_show(struct kobject *kobj,

static struct kobj_attribute rnbd_clt_state_attr = __ATTR_RO(state);

static ssize_t nr_poll_queues_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
struct rnbd_clt_dev *dev;

dev = container_of(kobj, struct rnbd_clt_dev, kobj);

return sysfs_emit(page, "%d\n", dev->nr_poll_queues);
}

static struct kobj_attribute rnbd_clt_nr_poll_queues =
__ATTR_RO(nr_poll_queues);

static ssize_t mapping_path_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
Expand Down Expand Up @@ -421,6 +450,7 @@ static struct attribute *rnbd_dev_attrs[] = {
&rnbd_clt_state_attr.attr,
&rnbd_clt_session_attr.attr,
&rnbd_clt_access_mode.attr,
&rnbd_clt_nr_poll_queues.attr,
NULL,
};

Expand Down Expand Up @@ -469,7 +499,7 @@ static ssize_t rnbd_clt_map_device_show(struct kobject *kobj,
char *page)
{
return scnprintf(page, PAGE_SIZE,
"Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
"Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
attr->attr.name);
}

Expand Down Expand Up @@ -541,6 +571,7 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
char sessname[NAME_MAX];
enum rnbd_access_mode access_mode = RNBD_ACCESS_RW;
u16 port_nr = RTRS_PORT;
u32 nr_poll_queues = 0;

struct sockaddr_storage *addrs;
struct rtrs_addr paths[6];
Expand All @@ -552,6 +583,7 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
opt.pathname = pathname;
opt.dest_port = &port_nr;
opt.access_mode = &access_mode;
opt.nr_poll_queues = &nr_poll_queues;
addrs = kcalloc(ARRAY_SIZE(paths) * 2, sizeof(*addrs), GFP_KERNEL);
if (!addrs)
return -ENOMEM;
Expand All @@ -565,12 +597,13 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
if (ret)
goto out;

pr_info("Mapping device %s on session %s, (access_mode: %s)\n",
pr_info("Mapping device %s on session %s, (access_mode: %s, nr_poll_queues: %d)\n",
pathname, sessname,
rnbd_access_mode_str(access_mode));
rnbd_access_mode_str(access_mode),
nr_poll_queues);

dev = rnbd_clt_map_device(sessname, paths, path_cnt, port_nr, pathname,
access_mode);
access_mode, nr_poll_queues);
if (IS_ERR(dev)) {
ret = PTR_ERR(dev);
goto out;
Expand Down
89 changes: 81 additions & 8 deletions drivers/block/rnbd/rnbd-clt.c
Original file line number Diff line number Diff line change
Expand Up @@ -1165,9 +1165,54 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx,
return ret;
}

static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx)
{
struct rnbd_queue *q = hctx->driver_data;
struct rnbd_clt_dev *dev = q->dev;
int cnt;

cnt = rtrs_clt_rdma_cq_direct(dev->sess->rtrs, hctx->queue_num);
return cnt;
}

static int rnbd_rdma_map_queues(struct blk_mq_tag_set *set)
{
struct rnbd_clt_session *sess = set->driver_data;

/* shared read/write queues */
set->map[HCTX_TYPE_DEFAULT].nr_queues = num_online_cpus();
set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
set->map[HCTX_TYPE_READ].nr_queues = num_online_cpus();
set->map[HCTX_TYPE_READ].queue_offset = 0;
blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);

if (sess->nr_poll_queues) {
/* dedicated queue for poll */
set->map[HCTX_TYPE_POLL].nr_queues = sess->nr_poll_queues;
set->map[HCTX_TYPE_POLL].queue_offset = set->map[HCTX_TYPE_READ].queue_offset +
set->map[HCTX_TYPE_READ].nr_queues;
blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
pr_info("[session=%s] mapped %d/%d/%d default/read/poll queues.\n",
sess->sessname,
set->map[HCTX_TYPE_DEFAULT].nr_queues,
set->map[HCTX_TYPE_READ].nr_queues,
set->map[HCTX_TYPE_POLL].nr_queues);
} else {
pr_info("[session=%s] mapped %d/%d default/read queues.\n",
sess->sessname,
set->map[HCTX_TYPE_DEFAULT].nr_queues,
set->map[HCTX_TYPE_READ].nr_queues);
}

return 0;
}

static struct blk_mq_ops rnbd_mq_ops = {
.queue_rq = rnbd_queue_rq,
.complete = rnbd_softirq_done_fn,
.map_queues = rnbd_rdma_map_queues,
.poll = rnbd_rdma_poll,
};

static int setup_mq_tags(struct rnbd_clt_session *sess)
Expand All @@ -1181,15 +1226,23 @@ static int setup_mq_tags(struct rnbd_clt_session *sess)
tag_set->flags = BLK_MQ_F_SHOULD_MERGE |
BLK_MQ_F_TAG_QUEUE_SHARED;
tag_set->cmd_size = sizeof(struct rnbd_iu) + RNBD_RDMA_SGL_SIZE;
tag_set->nr_hw_queues = num_online_cpus();

/* for HCTX_TYPE_DEFAULT, HCTX_TYPE_READ, HCTX_TYPE_POLL */
tag_set->nr_maps = sess->nr_poll_queues ? HCTX_MAX_TYPES : 2;
/*
* HCTX_TYPE_DEFAULT and HCTX_TYPE_READ share one set of queues
* others are for HCTX_TYPE_POLL
*/
tag_set->nr_hw_queues = num_online_cpus() + sess->nr_poll_queues;
tag_set->driver_data = sess;

return blk_mq_alloc_tag_set(tag_set);
}

static struct rnbd_clt_session *
find_and_get_or_create_sess(const char *sessname,
const struct rtrs_addr *paths,
size_t path_cnt, u16 port_nr)
size_t path_cnt, u16 port_nr, u32 nr_poll_queues)
{
struct rnbd_clt_session *sess;
struct rtrs_attrs attrs;
Expand All @@ -1198,6 +1251,17 @@ find_and_get_or_create_sess(const char *sessname,
struct rtrs_clt_ops rtrs_ops;

sess = find_or_create_sess(sessname, &first);
if (sess == ERR_PTR(-ENOMEM))
return ERR_PTR(-ENOMEM);
else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) {
/*
* A device MUST have its own session to use the polling-mode.
* It must fail to map new device with the same session.
*/
err = -EINVAL;
goto put_sess;
}

if (!first)
return sess;

Expand All @@ -1219,14 +1283,15 @@ find_and_get_or_create_sess(const char *sessname,
0, /* Do not use pdu of rtrs */
RECONNECT_DELAY, BMAX_SEGMENTS,
BLK_MAX_SEGMENT_SIZE,
MAX_RECONNECTS);
MAX_RECONNECTS, nr_poll_queues);
if (IS_ERR(sess->rtrs)) {
err = PTR_ERR(sess->rtrs);
goto wake_up_and_put;
}
rtrs_clt_query(sess->rtrs, &attrs);
sess->max_io_size = attrs.max_io_size;
sess->queue_depth = attrs.queue_depth;
sess->nr_poll_queues = nr_poll_queues;

err = setup_mq_tags(sess);
if (err)
Expand Down Expand Up @@ -1370,7 +1435,8 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)

static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
enum rnbd_access_mode access_mode,
const char *pathname)
const char *pathname,
u32 nr_poll_queues)
{
struct rnbd_clt_dev *dev;
int ret;
Expand All @@ -1379,7 +1445,12 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
if (!dev)
return ERR_PTR(-ENOMEM);

dev->hw_queues = kcalloc(nr_cpu_ids, sizeof(*dev->hw_queues),
/*
* nr_cpu_ids: the number of softirq queues
* nr_poll_queues: the number of polling queues
*/
dev->hw_queues = kcalloc(nr_cpu_ids + nr_poll_queues,
sizeof(*dev->hw_queues),
GFP_KERNEL);
if (!dev->hw_queues) {
ret = -ENOMEM;
Expand All @@ -1405,6 +1476,7 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
dev->clt_device_id = ret;
dev->sess = sess;
dev->access_mode = access_mode;
dev->nr_poll_queues = nr_poll_queues;
mutex_init(&dev->lock);
refcount_set(&dev->refcount, 1);
dev->dev_state = DEV_STATE_INIT;
Expand Down Expand Up @@ -1491,7 +1563,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
struct rtrs_addr *paths,
size_t path_cnt, u16 port_nr,
const char *pathname,
enum rnbd_access_mode access_mode)
enum rnbd_access_mode access_mode,
u32 nr_poll_queues)
{
struct rnbd_clt_session *sess;
struct rnbd_clt_dev *dev;
Expand All @@ -1500,11 +1573,11 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
if (unlikely(exists_devpath(pathname, sessname)))
return ERR_PTR(-EEXIST);

sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr);
sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr, nr_poll_queues);
if (IS_ERR(sess))
return ERR_CAST(sess);

dev = init_dev(sess, access_mode, pathname);
dev = init_dev(sess, access_mode, pathname, nr_poll_queues);
if (IS_ERR(dev)) {
pr_err("map_device: failed to map device '%s' from session %s, can't initialize device, err: %ld\n",
pathname, sess->sessname, PTR_ERR(dev));
Expand Down
5 changes: 4 additions & 1 deletion drivers/block/rnbd/rnbd-clt.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ struct rnbd_clt_session {
int queue_depth;
u32 max_io_size;
struct blk_mq_tag_set tag_set;
u32 nr_poll_queues;
struct mutex lock; /* protects state and devs_list */
struct list_head devs_list; /* list of struct rnbd_clt_dev */
refcount_t refcount;
Expand Down Expand Up @@ -118,6 +119,7 @@ struct rnbd_clt_dev {
enum rnbd_clt_dev_state dev_state;
char *pathname;
enum rnbd_access_mode access_mode;
u32 nr_poll_queues;
bool read_only;
bool rotational;
bool wc;
Expand Down Expand Up @@ -147,7 +149,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
struct rtrs_addr *paths,
size_t path_cnt, u16 port_nr,
const char *pathname,
enum rnbd_access_mode access_mode);
enum rnbd_access_mode access_mode,
u32 nr_poll_queues);
int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force,
const struct attribute *sysfs_self);

Expand Down

0 comments on commit 2958a99

Please sign in to comment.