Skip to content

Commit

Permalink
net/mlx5: Change ownership model for lag
Browse files Browse the repository at this point in the history
Lag is used to combine two PCI functions of the same HCA into a single
logical unit. This is a core functionality and as such should be managed by
the core driver. Currently this isn't the case. While we store the lag
software structure inside the lower device, its lifetime (creation /
destruction) is dictated by the mlx5e part. Change the ownership model so
lag is tied to the lifetime of the lower level driver instead to the
mlx5e part.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
  • Loading branch information
mark-bloch authored and Saeed Mahameed committed Jun 15, 2021
1 parent 8ed1947 commit 8a66e45
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 88 deletions.
4 changes: 2 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/en_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -5114,7 +5114,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
mlx5e_set_netdev_mtu_boundaries(priv);
mlx5e_set_dev_port_mtu(priv);

mlx5_lag_add(mdev, netdev);
mlx5_lag_add_netdev(mdev, netdev);

mlx5e_enable_async_events(priv);
mlx5e_enable_blocking_events(priv);
Expand Down Expand Up @@ -5162,7 +5162,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
priv->en_trap = NULL;
}
mlx5e_disable_async_events(priv);
mlx5_lag_remove(mdev);
mlx5_lag_remove_netdev(mdev, priv->netdev);
mlx5_vxlan_reset_to_default(mdev->vxlan);
}

Expand Down
4 changes: 2 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
Original file line number Diff line number Diff line change
Expand Up @@ -976,7 +976,7 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
if (MLX5_CAP_GEN(mdev, uplink_follow))
mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK,
0, 0, MLX5_VPORT_ADMIN_STATE_AUTO);
mlx5_lag_add(mdev, netdev);
mlx5_lag_add_netdev(mdev, netdev);
priv->events_nb.notifier_call = uplink_rep_async_event;
mlx5_notifier_register(mdev, &priv->events_nb);
mlx5e_dcbnl_initialize(priv);
Expand Down Expand Up @@ -1009,7 +1009,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
mlx5e_dcbnl_delete_app(priv);
mlx5_notifier_unregister(mdev, &priv->events_nb);
mlx5e_rep_tc_disable(priv);
mlx5_lag_remove(mdev);
mlx5_lag_remove_netdev(mdev, priv->netdev);
}

static MLX5E_DEFINE_STATS_GRP(sw_rep, 0);
Expand Down
221 changes: 141 additions & 80 deletions drivers/net/ethernet/mellanox/mlx5/core/lag.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,64 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
}
EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);

static int mlx5_lag_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr);
static void mlx5_do_bond_work(struct work_struct *work);

static void mlx5_ldev_free(struct kref *ref)
{
struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);

if (ldev->nb.notifier_call)
unregister_netdevice_notifier_net(&init_net, &ldev->nb);
mlx5_lag_mp_cleanup(ldev);
cancel_delayed_work_sync(&ldev->bond_work);
destroy_workqueue(ldev->wq);
kfree(ldev);
}

static void mlx5_ldev_put(struct mlx5_lag *ldev)
{
kref_put(&ldev->ref, mlx5_ldev_free);
}

static void mlx5_ldev_get(struct mlx5_lag *ldev)
{
kref_get(&ldev->ref);
}

static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
int err;

ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
if (!ldev)
return NULL;

ldev->wq = create_singlethread_workqueue("mlx5_lag");
if (!ldev->wq) {
kfree(ldev);
return NULL;
}

kref_init(&ldev->ref);
INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);

ldev->nb.notifier_call = mlx5_lag_netdev_event;
if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
ldev->nb.notifier_call = NULL;
mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
}

err = mlx5_lag_mp_init(ldev);
if (err)
mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
err);

return ldev;
}

int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
struct net_device *ndev)
{
Expand Down Expand Up @@ -511,55 +569,52 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
}

static struct mlx5_lag *mlx5_lag_dev_alloc(void)
static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
struct mlx5_core_dev *dev,
struct net_device *netdev)
{
struct mlx5_lag *ldev;

ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
if (!ldev)
return NULL;

ldev->wq = create_singlethread_workqueue("mlx5_lag");
if (!ldev->wq) {
kfree(ldev);
return NULL;
}
unsigned int fn = PCI_FUNC(dev->pdev->devfn);

INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
if (fn >= MLX5_MAX_PORTS)
return;

return ldev;
spin_lock(&lag_lock);
ldev->pf[fn].netdev = netdev;
ldev->tracker.netdev_state[fn].link_up = 0;
ldev->tracker.netdev_state[fn].tx_enabled = 0;
spin_unlock(&lag_lock);
}

static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
struct net_device *netdev)
{
destroy_workqueue(ldev->wq);
kfree(ldev);
int i;

spin_lock(&lag_lock);
for (i = 0; i < MLX5_MAX_PORTS; i++) {
if (ldev->pf[i].netdev == netdev) {
ldev->pf[i].netdev = NULL;
break;
}
}
spin_unlock(&lag_lock);
}

static int mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
struct mlx5_core_dev *dev,
struct net_device *netdev)
static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
struct mlx5_core_dev *dev)
{
unsigned int fn = PCI_FUNC(dev->pdev->devfn);

if (fn >= MLX5_MAX_PORTS)
return -EPERM;

spin_lock(&lag_lock);
ldev->pf[fn].dev = dev;
ldev->pf[fn].netdev = netdev;
ldev->tracker.netdev_state[fn].link_up = 0;
ldev->tracker.netdev_state[fn].tx_enabled = 0;
return;

ldev->pf[fn].dev = dev;
dev->priv.lag = ldev;

spin_unlock(&lag_lock);

return fn;
}

static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
struct mlx5_core_dev *dev)
/* Must be called with intf_mutex held */
static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
struct mlx5_core_dev *dev)
{
int i;

Expand All @@ -570,19 +625,15 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
if (i == MLX5_MAX_PORTS)
return;

spin_lock(&lag_lock);
memset(&ldev->pf[i], 0, sizeof(*ldev->pf));

ldev->pf[i].dev = NULL;
dev->priv.lag = NULL;
spin_unlock(&lag_lock);
}

/* Must be called with intf_mutex held */
void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev = NULL;
struct mlx5_core_dev *tmp_dev;
int i, err;

if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
!MLX5_CAP_GEN(dev, lag_master) ||
Expand All @@ -594,67 +645,77 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
ldev = tmp_dev->priv.lag;

if (!ldev) {
ldev = mlx5_lag_dev_alloc();
ldev = mlx5_lag_dev_alloc(dev);
if (!ldev) {
mlx5_core_err(dev, "Failed to alloc lag dev\n");
return;
}
} else {
mlx5_ldev_get(ldev);
}

if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0)
return;
mlx5_ldev_add_mdev(ldev, dev);

for (i = 0; i < MLX5_MAX_PORTS; i++)
if (!ldev->pf[i].dev)
break;
return;
}

if (i >= MLX5_MAX_PORTS)
ldev->flags |= MLX5_LAG_FLAG_READY;
void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;

if (!ldev->nb.notifier_call) {
ldev->nb.notifier_call = mlx5_lag_netdev_event;
if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
ldev->nb.notifier_call = NULL;
mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
}
}
ldev = mlx5_lag_dev(dev);
if (!ldev)
return;

err = mlx5_lag_mp_init(ldev);
if (err)
mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
err);
mlx5_dev_list_lock();
mlx5_ldev_remove_mdev(ldev, dev);
mlx5_dev_list_unlock();
mlx5_ldev_put(ldev);
}

void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
{
mlx5_dev_list_lock();
__mlx5_lag_dev_add_mdev(dev);
mlx5_dev_list_unlock();
}

/* Must be called with intf_mutex held */
void mlx5_lag_remove(struct mlx5_core_dev *dev)
void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
struct net_device *netdev)
{
struct mlx5_lag *ldev;
int i;

ldev = mlx5_lag_dev_get(dev);
ldev = mlx5_lag_dev(dev);
if (!ldev)
return;

if (__mlx5_lag_is_active(ldev))
mlx5_disable_lag(ldev);

mlx5_lag_dev_remove_pf(ldev, dev);

mlx5_ldev_remove_netdev(ldev, netdev);
ldev->flags &= ~MLX5_LAG_FLAG_READY;
}

/* Must be called with intf_mutex held */
void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
struct net_device *netdev)
{
struct mlx5_lag *ldev;
int i;

ldev = mlx5_lag_dev(dev);
if (!ldev)
return;

mlx5_ldev_add_netdev(ldev, dev, netdev);

for (i = 0; i < MLX5_MAX_PORTS; i++)
if (ldev->pf[i].dev)
if (!ldev->pf[i].dev)
break;

if (i == MLX5_MAX_PORTS) {
if (ldev->nb.notifier_call) {
unregister_netdevice_notifier_net(&init_net, &ldev->nb);
ldev->nb.notifier_call = NULL;
}
mlx5_lag_mp_cleanup(ldev);
cancel_delayed_work_sync(&ldev->bond_work);
mlx5_lag_dev_free(ldev);
}
if (i >= MLX5_MAX_PORTS)
ldev->flags |= MLX5_LAG_FLAG_READY;
}

bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
Expand All @@ -663,7 +724,7 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
bool res;

spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_roce(ldev);
spin_unlock(&lag_lock);

Expand All @@ -677,7 +738,7 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
bool res;

spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_active(ldev);
spin_unlock(&lag_lock);

Expand All @@ -691,7 +752,7 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
bool res;

spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_sriov(ldev);
spin_unlock(&lag_lock);

Expand All @@ -704,7 +765,7 @@ void mlx5_lag_update(struct mlx5_core_dev *dev)
struct mlx5_lag *ldev;

mlx5_dev_list_lock();
ldev = mlx5_lag_dev_get(dev);
ldev = mlx5_lag_dev(dev);
if (!ldev)
goto unlock;

Expand All @@ -720,7 +781,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
struct mlx5_lag *ldev;

spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev);
ldev = mlx5_lag_dev(dev);

if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;
Expand Down Expand Up @@ -749,7 +810,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
u8 port = 0;

spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev);
ldev = mlx5_lag_dev(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;

Expand Down Expand Up @@ -785,7 +846,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
memset(values, 0, sizeof(*values) * num_counters);

spin_lock(&lag_lock);
ldev = mlx5_lag_dev_get(dev);
ldev = mlx5_lag_dev(dev);
if (ldev && __mlx5_lag_is_active(ldev)) {
num_ports = MLX5_MAX_PORTS;
mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
Expand Down

0 comments on commit 8a66e45

Please sign in to comment.