Skip to content

Commit

Permalink
igb: fix deadlock caused by taking RTNL in RPM resume path
Browse files Browse the repository at this point in the history
[ Upstream commit ac8c58f ]

Recent net core changes caused an issue with few Intel drivers
(reportedly igb), where taking RTNL in RPM resume path results in a
deadlock. See [0] for a bug report. I don't think the core changes
are wrong, but taking RTNL in RPM resume path isn't needed.
The Intel drivers are the only ones doing this. See [1] for a
discussion on the issue. Following patch changes the RPM resume path
to not take RTNL.

[0] https://bugzilla.kernel.org/show_bug.cgi?id=215129
[1] https://lore.kernel.org/netdev/20211125074949.5f897431@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com/t/

Fixes: bd86924 ("net: core: try to runtime-resume detached device in __dev_open")
Fixes: f32a213 ("ethtool: runtime-resume netdev parent before ethtool ioctl ops")
Tested-by: Martin Stolpe <martin.stolpe@gmail.com>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Link: https://lore.kernel.org/r/20211220201844.2714498-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
hkallweit authored and gregkh committed Dec 29, 2021
1 parent e00eace commit 61e6b82
Showing 1 changed file with 13 additions and 6 deletions.
19 changes: 13 additions & 6 deletions drivers/net/ethernet/intel/igb/igb_main.c
Expand Up @@ -9260,7 +9260,7 @@ static int __maybe_unused igb_suspend(struct device *dev)
return __igb_shutdown(to_pci_dev(dev), NULL, 0);
}

static int __maybe_unused igb_resume(struct device *dev)
static int __maybe_unused __igb_resume(struct device *dev, bool rpm)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct net_device *netdev = pci_get_drvdata(pdev);
Expand Down Expand Up @@ -9303,17 +9303,24 @@ static int __maybe_unused igb_resume(struct device *dev)

wr32(E1000_WUS, ~0);

rtnl_lock();
if (!rpm)
rtnl_lock();
if (!err && netif_running(netdev))
err = __igb_open(netdev, true);

if (!err)
netif_device_attach(netdev);
rtnl_unlock();
if (!rpm)
rtnl_unlock();

return err;
}

static int __maybe_unused igb_resume(struct device *dev)
{
return __igb_resume(dev, false);
}

static int __maybe_unused igb_runtime_idle(struct device *dev)
{
struct net_device *netdev = dev_get_drvdata(dev);
Expand All @@ -9332,7 +9339,7 @@ static int __maybe_unused igb_runtime_suspend(struct device *dev)

static int __maybe_unused igb_runtime_resume(struct device *dev)
{
return igb_resume(dev);
return __igb_resume(dev, true);
}

static void igb_shutdown(struct pci_dev *pdev)
Expand Down Expand Up @@ -9448,7 +9455,7 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
* @pdev: Pointer to PCI device
*
* Restart the card from scratch, as if from a cold-boot. Implementation
* resembles the first-half of the igb_resume routine.
* resembles the first-half of the __igb_resume routine.
**/
static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
{
Expand Down Expand Up @@ -9488,7 +9495,7 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
*
* This callback is called when the error recovery driver tells us that
* its OK to resume normal operation. Implementation resembles the
* second-half of the igb_resume routine.
* second-half of the __igb_resume routine.
*/
static void igb_io_resume(struct pci_dev *pdev)
{
Expand Down

0 comments on commit 61e6b82

Please sign in to comment.