Skip to content

Commit

Permalink
net/mlx5: Avoid double clear or set of sync reset requested
Browse files Browse the repository at this point in the history
commit fc3d3db upstream.

Double clear of reset requested state can lead to NULL pointer as it
will try to delete the timer twice. This can happen for example on a
race between abort from FW and pci error or reset. Avoid such case using
test_and_clear_bit() to verify only one time reset requested state clear
flow. Similarly use test_and_set_bit() to verify only one time reset
requested state set flow.

Fixes: 7dd6df3 ("net/mlx5: Handle sync reset abort event")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Maher Sanalla <msanalla@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  • Loading branch information
mosheshemesh2 authored and gregkh committed May 12, 2022
1 parent 01ce31c commit bcb60cf
Showing 1 changed file with 19 additions and 9 deletions.
28 changes: 19 additions & 9 deletions drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
Expand Up @@ -135,14 +135,19 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
del_timer_sync(&fw_reset->timer);
}

static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;

if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
mlx5_core_warn(dev, "Reset request was already cleared\n");
return -EALREADY;
}

mlx5_stop_sync_reset_poll(dev);
clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
if (poll_health)
mlx5_start_health_poll(dev);
return 0;
}

#define MLX5_RESET_POLL_INTERVAL (HZ / 10)
Expand Down Expand Up @@ -186,13 +191,17 @@ static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev)
return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
}

static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;

if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
mlx5_core_warn(dev, "Reset request was already set\n");
return -EALREADY;
}
mlx5_stop_health_poll(dev, true);
set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
mlx5_start_sync_reset_poll(dev);
return 0;
}

static void mlx5_fw_live_patch_event(struct work_struct *work)
Expand Down Expand Up @@ -221,7 +230,9 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
err ? "Failed" : "Sent");
return;
}
mlx5_sync_reset_set_reset_requested(dev);
if (mlx5_sync_reset_set_reset_requested(dev))
return;

err = mlx5_fw_reset_set_reset_sync_ack(dev);
if (err)
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
Expand Down Expand Up @@ -319,7 +330,8 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
struct mlx5_core_dev *dev = fw_reset->dev;
int err;

mlx5_sync_reset_clear_reset_requested(dev, false);
if (mlx5_sync_reset_clear_reset_requested(dev, false))
return;

mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");

Expand Down Expand Up @@ -348,10 +360,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work)
reset_abort_work);
struct mlx5_core_dev *dev = fw_reset->dev;

if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
if (mlx5_sync_reset_clear_reset_requested(dev, true))
return;

mlx5_sync_reset_clear_reset_requested(dev, true);
mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
}

Expand Down

0 comments on commit bcb60cf

Please sign in to comment.