Skip to content

Commit

Permalink
net/mlx5e: Support dump callback in TX reporter
Browse files Browse the repository at this point in the history
Add support for SQ's FW dump on TX reporter's events. Use Resource dump
API to retrieve the relevant data: SX slice, SQ dump and SQ buffer. Wrap
it in formatted messages and store the binary output in devlink core.

Example:
$ devlink health dump show pci/0000:00:0b.0 reporter tx
SX Slice:
   data:
     00 00 00 00 00 00 00 80 00 01 00 00 00 00 ad de
     22 01 00 00 00 00 ad de 00 00 00 00 00 00 00 00
     00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
     ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
     00 02 01 00 00 00 00 80 00 01 00 00 00 00 ad de
     22 01 00 00 00 00 ad de 00 20 40 90 81 88 ff ff
     00 00 00 00 00 00 00 00 15 00 15 00 00 00 00 00
     ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
     00 00 00 00 00 00 00 80 81 ae 41 06 00 ea ff ff
  SQs:
    SQ:
      index: 1511
      data:
        00 00 00 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 02 01 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 20 40 90 81 88 ff ff
        00 00 00 00 00 00 00 00 15 00 15 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 80 81 ae 41 06 00 ea ff ff
    SQ:
      index: 1516
      data:
        00 00 00 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 02 01 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 20 40 90 81 88 ff ff
        00 00 00 00 00 00 00 00 15 00 15 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 80 81 ae 41 06 00 ea ff ff

$ devlink health dump show pci/0000:00:0b.0 reporter tx -jp
{
    "SX Slice": {
    	"data": [ 0,0,0,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,32,64,144,129,136,255,255,0,0,0,0,0,0,0,0,21,0,21,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,129,174,65,6,0,234,255,255],
    	},
    "SQs": [ {
            "SQ": {
                "index": 1511,
                "data": [ 0,0,0,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,32,64,144,129,136,255,255,0,0,0,0,0,0,0,0,21,0,21,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,129,174,65,6,0,234,255,255]
            }
        },{
            "SQ": {
                "index": 1516,
                "data": [ 0,0,0,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,32,64,144,129,136,255,255,0,0,0,0,0,0,0,0,21,0,21,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,129,174,65,6,0,234,255,255]
            }
        } ]
}

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
  • Loading branch information
ayalevin123 authored and Saeed Mahameed committed Feb 19, 2020
1 parent 0a56be3 commit 5f29458
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 2 deletions.
105 changes: 105 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/en/health.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "health.h"
#include "lib/eq.h"
#include "lib/mlx5.h"

int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
{
Expand Down Expand Up @@ -204,3 +205,107 @@ int mlx5e_health_report(struct mlx5e_priv *priv,

return devlink_health_report(reporter, err_str, err_ctx);
}

#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
const void *value, u32 value_len)

{
u32 data_size;
u32 offset;
int err;

for (offset = 0; offset < value_len; offset += data_size) {
data_size = value_len - offset;
if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
if (err)
break;
}
return err;
}

int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
struct devlink_fmsg *fmsg)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5_rsc_dump_cmd *cmd;
struct page *page;
int cmd_err, err;
int end_err;
int size;

if (IS_ERR_OR_NULL(mdev->rsc_dump))
return -EOPNOTSUPP;

page = alloc_page(GFP_KERNEL);
if (!page)
return -ENOMEM;

err = devlink_fmsg_binary_pair_nest_start(fmsg, "data");
if (err)
return err;

cmd = mlx5_rsc_dump_cmd_create(mdev, key);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
goto free_page;
}

do {
cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
if (cmd_err < 0) {
err = cmd_err;
goto destroy_cmd;
}

err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
if (err)
goto destroy_cmd;

} while (cmd_err > 0);

destroy_cmd:
mlx5_rsc_dump_cmd_destroy(cmd);
end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
if (end_err)
err = end_err;
free_page:
__free_page(page);
return err;
}

int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
int queue_idx, char *lbl)
{
struct mlx5_rsc_key key = {};
int err;

key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
key.index1 = queue_idx;
key.size = PAGE_SIZE;
key.num_of_obj1 = 1;

err = devlink_fmsg_obj_nest_start(fmsg);
if (err)
return err;

err = mlx5e_reporter_named_obj_nest_start(fmsg, lbl);
if (err)
return err;

err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
if (err)
return err;

err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
if (err)
return err;

err = mlx5e_reporter_named_obj_nest_end(fmsg);
if (err)
return err;

return devlink_fmsg_obj_nest_end(fmsg);
}
8 changes: 6 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/en/health.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#define __MLX5E_EN_HEALTH_H

#include "en.h"
#include "diag/rsc_dump.h"

#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)

Expand Down Expand Up @@ -36,6 +37,7 @@ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);

struct mlx5e_err_ctx {
int (*recover)(void *ctx);
int (*dump)(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, void *ctx);
void *ctx;
};

Expand All @@ -48,6 +50,8 @@ int mlx5e_health_report(struct mlx5e_priv *priv,
int mlx5e_health_create_reporters(struct mlx5e_priv *priv);
void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv);
void mlx5e_health_channels_update(struct mlx5e_priv *priv);


int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
struct devlink_fmsg *fmsg);
int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
int queue_idx, char *lbl);
#endif
123 changes: 123 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,126 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
return err;
}

static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
void *ctx)
{
struct mlx5_rsc_key key = {};
struct mlx5e_txqsq *sq = ctx;
int err;

if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
return 0;

err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice");
if (err)
return err;

key.size = PAGE_SIZE;
key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
if (err)
return err;

err = mlx5e_reporter_named_obj_nest_end(fmsg);
if (err)
return err;

err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ");
if (err)
return err;

err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC");
if (err)
return err;

key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
key.index1 = sq->sqn;
key.num_of_obj1 = 1;

err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
if (err)
return err;

err = mlx5e_reporter_named_obj_nest_end(fmsg);
if (err)
return err;

err = mlx5e_reporter_named_obj_nest_start(fmsg, "send_buff");
if (err)
return err;

key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
if (err)
return err;

err = mlx5e_reporter_named_obj_nest_end(fmsg);
if (err)
return err;

return mlx5e_reporter_named_obj_nest_end(fmsg);
}

static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
struct devlink_fmsg *fmsg)
{
struct mlx5_rsc_key key = {};
int i, tc, err;

if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
return 0;

err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice");
if (err)
return err;

key.size = PAGE_SIZE;
key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
if (err)
return err;

err = mlx5e_reporter_named_obj_nest_end(fmsg);
if (err)
return err;

err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
if (err)
return err;

for (i = 0; i < priv->channels.num; i++) {
struct mlx5e_channel *c = priv->channels.c[i];

for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
struct mlx5e_txqsq *sq = &c->sq[tc];

err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
if (err)
return err;
}
}
return devlink_fmsg_arr_pair_nest_end(fmsg);
}

static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
struct mlx5e_err_ctx *err_ctx,
struct devlink_fmsg *fmsg)
{
return err_ctx->dump(priv, fmsg, err_ctx->ctx);
}

static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter,
struct devlink_fmsg *fmsg, void *context,
struct netlink_ext_ack *extack)
{
struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
struct mlx5e_err_ctx *err_ctx = context;

return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
mlx5e_tx_reporter_dump_all_sqs(priv, fmsg);
}

void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
{
struct mlx5e_priv *priv = sq->channel->priv;
Expand All @@ -254,6 +374,7 @@ void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)

err_ctx.ctx = sq;
err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
err_ctx.dump = mlx5e_tx_reporter_dump_sq;
sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn);

mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
Expand All @@ -267,6 +388,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)

err_ctx.ctx = sq;
err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
err_ctx.dump = mlx5e_tx_reporter_dump_sq;
sprintf(err_str,
"TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
Expand All @@ -279,6 +401,7 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
.name = "tx",
.recover = mlx5e_tx_reporter_recover,
.diagnose = mlx5e_tx_reporter_diagnose,
.dump = mlx5e_tx_reporter_dump,
};

#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
Expand Down

0 comments on commit 5f29458

Please sign in to comment.