Skip to content

Commit

Permalink
RDMA: Add support to dump resource tracker in RAW format
Browse files Browse the repository at this point in the history
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.

The raw query returns only the device specific data, general data is still
returned by using the existing queries.

Example:

$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]

Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
  • Loading branch information
Maor Gottlieb authored and jgunthorpe committed Jun 24, 2020
1 parent 211cd94 commit 6595952
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 62 deletions.
3 changes: 3 additions & 0 deletions drivers/infiniband/core/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -2619,8 +2619,11 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, enable_driver);
SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry);
SET_DEVICE_OP(dev_ops, fill_res_cq_entry);
SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw);
SET_DEVICE_OP(dev_ops, fill_res_mr_entry);
SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw);
SET_DEVICE_OP(dev_ops, fill_res_qp_entry);
SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw);
SET_DEVICE_OP(dev_ops, fill_stat_mr_entry);
SET_DEVICE_OP(dev_ops, get_dev_fw_str);
SET_DEVICE_OP(dev_ops, get_dma_mr);
Expand Down
180 changes: 118 additions & 62 deletions drivers/infiniband/core/nldev.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY },
[RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
Expand Down Expand Up @@ -446,11 +447,11 @@ static int fill_res_name_pid(struct sk_buff *msg,
return err ? -EMSGSIZE : 0;
}

static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
static int fill_res_qp_entry_query(struct sk_buff *msg,
struct rdma_restrack_entry *res,
struct ib_device *dev,
struct ib_qp *qp)
{
struct ib_qp *qp = container_of(res, struct ib_qp, res);
struct ib_device *dev = qp->device;
struct ib_qp_init_attr qp_init_attr;
struct ib_qp_attr qp_attr;
int ret;
Expand All @@ -459,16 +460,6 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (ret)
return ret;

if (port && port != qp_attr.port_num)
return -EAGAIN;

/* In create_qp() port is not set yet */
if (qp_attr.port_num &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
goto err;

if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
goto err;
if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
qp_attr.dest_qp_num))
Expand All @@ -492,20 +483,55 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
goto err;

if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
goto err;

if (fill_res_name_pid(msg, res))
goto err;

if (dev->ops.fill_res_qp_entry)
return dev->ops.fill_res_qp_entry(msg, qp);
return 0;

err: return -EMSGSIZE;
}

static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_qp *qp = container_of(res, struct ib_qp, res);
struct ib_device *dev = qp->device;
int ret;

if (port && port != qp->port)
return -EAGAIN;

/* In create_qp() port is not set yet */
if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
return -EINVAL;

ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
if (ret)
return -EMSGSIZE;

if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
return -EMSGSIZE;

ret = fill_res_name_pid(msg, res);
if (ret)
return -EMSGSIZE;

return fill_res_qp_entry_query(msg, res, dev, qp);
}

static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_qp *qp = container_of(res, struct ib_qp, res);
struct ib_device *dev = qp->device;

if (port && port != qp->port)
return -EAGAIN;
if (!dev->ops.fill_res_qp_entry_raw)
return -EINVAL;
return dev->ops.fill_res_qp_entry_raw(msg, qp);
}

static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
{
Expand Down Expand Up @@ -565,34 +591,42 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct ib_device *dev = cq->device;

if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
goto err;
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
goto err;
return -EMSGSIZE;

/* Poll context is only valid for kernel CQs */
if (rdma_is_kernel_res(res) &&
nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
goto err;
return -EMSGSIZE;

if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
goto err;
return -EMSGSIZE;

if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
goto err;
return -EMSGSIZE;
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
cq->uobject->uevent.uobject.context->res.id))
goto err;
return -EMSGSIZE;

if (fill_res_name_pid(msg, res))
goto err;
return -EMSGSIZE;

if (dev->ops.fill_res_cq_entry)
return dev->ops.fill_res_cq_entry(msg, cq);
return 0;
return (dev->ops.fill_res_cq_entry) ?
dev->ops.fill_res_cq_entry(msg, cq) : 0;
}

err: return -EMSGSIZE;
static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_cq *cq = container_of(res, struct ib_cq, res);
struct ib_device *dev = cq->device;

if (!dev->ops.fill_res_cq_entry_raw)
return -EINVAL;
return dev->ops.fill_res_cq_entry_raw(msg, cq);
}

static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
Expand All @@ -603,30 +637,39 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,

if (has_cap_net_admin) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
goto err;
return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
goto err;
return -EMSGSIZE;
}

if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
RDMA_NLDEV_ATTR_PAD))
goto err;
return -EMSGSIZE;

if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
goto err;
return -EMSGSIZE;

if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
goto err;
return -EMSGSIZE;

if (fill_res_name_pid(msg, res))
goto err;
return -EMSGSIZE;

if (dev->ops.fill_res_mr_entry)
return dev->ops.fill_res_mr_entry(msg, mr);
return 0;
return (dev->ops.fill_res_mr_entry) ?
dev->ops.fill_res_mr_entry(msg, mr) :
0;
}

err: return -EMSGSIZE;
static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_mr *mr = container_of(res, struct ib_mr, res);
struct ib_device *dev = mr->pd->device;

if (!dev->ops.fill_res_mr_entry_raw)
return -EINVAL;
return dev->ops.fill_res_mr_entry_raw(msg, mr);
}

static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
Expand Down Expand Up @@ -1149,7 +1192,6 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,

struct nldev_fill_res_entry {
enum rdma_nldev_attr nldev_attr;
enum rdma_nldev_command nldev_cmd;
u8 flags;
u32 entry;
u32 id;
Expand All @@ -1161,40 +1203,34 @@ enum nldev_res_flags {

static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
[RDMA_RESTRACK_QP] = {
.nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_LQPN,
},
[RDMA_RESTRACK_CM_ID] = {
.nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
},
[RDMA_RESTRACK_CQ] = {
.nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_CQN,
},
[RDMA_RESTRACK_MR] = {
.nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_MRN,
},
[RDMA_RESTRACK_PD] = {
.nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_PDN,
},
[RDMA_RESTRACK_COUNTER] = {
.nldev_cmd = RDMA_NLDEV_CMD_STAT_GET,
.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
Expand Down Expand Up @@ -1253,7 +1289,8 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
}

nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NL_GET_OP(nlh->nlmsg_type)),
0, 0);

if (fill_nldev_handle(msg, device)) {
Expand Down Expand Up @@ -1331,7 +1368,8 @@ static int res_get_common_dumpit(struct sk_buff *skb,
}

nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
0, NLM_F_MULTI);

if (fill_nldev_handle(skb, device)) {
Expand Down Expand Up @@ -1413,26 +1451,29 @@ next: idx++;
return ret;
}

#define RES_GET_FUNCS(name, type) \
static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
#define RES_GET_FUNCS(name, type) \
static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
struct netlink_callback *cb) \
{ \
return res_get_common_dumpit(skb, cb, type, \
fill_res_##name##_entry); \
} \
static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
struct nlmsghdr *nlh, \
{ \
return res_get_common_dumpit(skb, cb, type, \
fill_res_##name##_entry); \
} \
static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
struct nlmsghdr *nlh, \
struct netlink_ext_ack *extack) \
{ \
return res_get_common_doit(skb, nlh, extack, type, \
fill_res_##name##_entry); \
{ \
return res_get_common_doit(skb, nlh, extack, type, \
fill_res_##name##_entry); \
}

RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);

static LIST_HEAD(link_ops);
Expand Down Expand Up @@ -2117,6 +2158,21 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_stat_del_doit,
.flags = RDMA_NL_ADMIN_PERM,
},
[RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
.doit = nldev_res_get_qp_raw_doit,
.dump = nldev_res_get_qp_raw_dumpit,
.flags = RDMA_NL_ADMIN_PERM,
},
[RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
.doit = nldev_res_get_cq_raw_doit,
.dump = nldev_res_get_cq_raw_dumpit,
.flags = RDMA_NL_ADMIN_PERM,
},
[RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
.doit = nldev_res_get_mr_raw_doit,
.dump = nldev_res_get_mr_raw_dumpit,
.flags = RDMA_NL_ADMIN_PERM,
},
};

void __init nldev_init(void)
Expand Down
3 changes: 3 additions & 0 deletions include/rdma/ib_verbs.h
Original file line number Diff line number Diff line change
Expand Up @@ -2583,8 +2583,11 @@ struct ib_device_ops {
* Allows rdma drivers to add their own restrack attributes.
*/
int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr);
int (*fill_res_mr_entry_raw)(struct sk_buff *msg, struct ib_mr *ibmr);
int (*fill_res_cq_entry)(struct sk_buff *msg, struct ib_cq *ibcq);
int (*fill_res_cq_entry_raw)(struct sk_buff *msg, struct ib_cq *ibcq);
int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp);
int (*fill_res_qp_entry_raw)(struct sk_buff *msg, struct ib_qp *ibqp);
int (*fill_res_cm_id_entry)(struct sk_buff *msg, struct rdma_cm_id *id);

/* Device lifecycle callbacks */
Expand Down
8 changes: 8 additions & 0 deletions include/uapi/rdma/rdma_netlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,12 @@ enum rdma_nldev_command {

RDMA_NLDEV_CMD_STAT_DEL,

RDMA_NLDEV_CMD_RES_QP_GET_RAW,

RDMA_NLDEV_CMD_RES_CQ_GET_RAW,

RDMA_NLDEV_CMD_RES_MR_GET_RAW,

RDMA_NLDEV_NUM_OPS
};

Expand Down Expand Up @@ -525,6 +531,8 @@ enum rdma_nldev_attr {
*/
RDMA_NLDEV_ATTR_DEV_DIM, /* u8 */

RDMA_NLDEV_ATTR_RES_RAW, /* binary */

/*
* Always the end
*/
Expand Down

0 comments on commit 6595952

Please sign in to comment.