Skip to content

Commit 958772a

Browse files
dolcinismb49
authored andcommitted
net: phy: Fix race condition on link status change
BugLink: https://bugs.launchpad.net/bugs/1980407 commit 91a7cda upstream. This fixes the following error caused by a race condition between phydev->adjust_link() and a MDIO transaction in the phy interrupt handler. The issue was reproduced with the ethernet FEC driver and a micrel KSZ9031 phy. [ 146.195696] fec 2188000.ethernet eth0: MDIO read timeout [ 146.201779] ------------[ cut here ]------------ [ 146.206671] WARNING: CPU: 0 PID: 571 at drivers/net/phy/phy.c:942 phy_error+0x24/0x6c [ 146.214744] Modules linked in: bnep imx_vdoa imx_sdma evbug [ 146.220640] CPU: 0 PID: 571 Comm: irq/128-2188000 Not tainted 5.18.0-rc3-00080-gd569e86915b7 #9 [ 146.229563] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [ 146.236257] unwind_backtrace from show_stack+0x10/0x14 [ 146.241640] show_stack from dump_stack_lvl+0x58/0x70 [ 146.246841] dump_stack_lvl from __warn+0xb4/0x24c [ 146.251772] __warn from warn_slowpath_fmt+0x5c/0xd4 [ 146.256873] warn_slowpath_fmt from phy_error+0x24/0x6c [ 146.262249] phy_error from kszphy_handle_interrupt+0x40/0x48 [ 146.268159] kszphy_handle_interrupt from irq_thread_fn+0x1c/0x78 [ 146.274417] irq_thread_fn from irq_thread+0xf0/0x1dc [ 146.279605] irq_thread from kthread+0xe4/0x104 [ 146.284267] kthread from ret_from_fork+0x14/0x28 [ 146.289164] Exception stack(0xe6fa1fb0 to 0xe6fa1ff8) [ 146.294448] 1fa0: 00000000 00000000 00000000 00000000 [ 146.302842] 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 146.311281] 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 [ 146.318262] irq event stamp: 12325 [ 146.321780] hardirqs last enabled at (12333): [<c01984c4>] __up_console_sem+0x50/0x60 [ 146.330013] hardirqs last disabled at (12342): [<c01984b0>] __up_console_sem+0x3c/0x60 [ 146.338259] softirqs last enabled at (12324): [<c01017f0>] __do_softirq+0x2c0/0x624 [ 146.346311] softirqs last disabled at (12319): [<c01300ac>] __irq_exit_rcu+0x138/0x178 [ 146.354447] ---[ end trace 0000000000000000 ]--- With the FEC driver phydev->adjust_link() calls fec_enet_adjust_link() calls fec_stop()/fec_restart() and both these function reset and temporary disable the FEC disrupting any MII transaction that could be happening at the same time. fec_enet_adjust_link() and phy_read() can be running at the same time when we have one additional interrupt before the phy_state_machine() is able to terminate. Thread 1 (phylib WQ) | Thread 2 (phy interrupt) | | phy_interrupt() <-- PHY IRQ | handle_interrupt() | phy_read() | phy_trigger_machine() | --> schedule phylib WQ | | phy_state_machine() | phy_check_link_status() | phy_link_change() | phydev->adjust_link() | fec_enet_adjust_link() | --> FEC reset | phy_interrupt() <-- PHY IRQ | phy_read() | Fix this by acquiring the phydev lock in phy_interrupt(). Link: https://lore.kernel.org/all/[email protected]/ Fixes: c974bdb ("net: phy: Use threaded IRQ, to allow IRQ from sleeping devices") cc: <[email protected]> Signed-off-by: Francesco Dolcini <[email protected]> Reviewed-by: Andrew Lunn <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]> [fd: backport: adapt locking before did_interrupt()/ack_interrupt() callbacks removal ] Signed-off-by: Francesco Dolcini <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]> Signed-off-by: Kamal Mostafa <[email protected]> Signed-off-by: Stefan Bader <[email protected]>
1 parent 8f68d8c commit 958772a

File tree

1 file changed

+40
-5
lines changed

1 file changed

+40
-5
lines changed

drivers/net/phy/phy.c

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,15 @@ EXPORT_SYMBOL(phy_print_status);
116116
*/
117117
static int phy_clear_interrupt(struct phy_device *phydev)
118118
{
119-
if (phydev->drv->ack_interrupt)
120-
return phydev->drv->ack_interrupt(phydev);
119+
int ret = 0;
121120

122-
return 0;
121+
if (phydev->drv->ack_interrupt) {
122+
mutex_lock(&phydev->lock);
123+
ret = phydev->drv->ack_interrupt(phydev);
124+
mutex_unlock(&phydev->lock);
125+
}
126+
127+
return ret;
123128
}
124129

125130
/**
@@ -760,6 +765,36 @@ static int phy_disable_interrupts(struct phy_device *phydev)
760765
return phy_clear_interrupt(phydev);
761766
}
762767

768+
/**
769+
* phy_did_interrupt - Checks if the PHY generated an interrupt
770+
* @phydev: target phy_device struct
771+
*/
772+
static int phy_did_interrupt(struct phy_device *phydev)
773+
{
774+
int ret;
775+
776+
mutex_lock(&phydev->lock);
777+
ret = phydev->drv->did_interrupt(phydev);
778+
mutex_unlock(&phydev->lock);
779+
780+
return ret;
781+
}
782+
783+
/**
784+
* phy_handle_interrupt - PHY specific interrupt handler
785+
* @phydev: target phy_device struct
786+
*/
787+
static int phy_handle_interrupt(struct phy_device *phydev)
788+
{
789+
int ret;
790+
791+
mutex_lock(&phydev->lock);
792+
ret = phydev->drv->handle_interrupt(phydev);
793+
mutex_unlock(&phydev->lock);
794+
795+
return ret;
796+
}
797+
763798
/**
764799
* phy_interrupt - PHY interrupt handler
765800
* @irq: interrupt line
@@ -771,11 +806,11 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
771806
{
772807
struct phy_device *phydev = phy_dat;
773808

774-
if (phydev->drv->did_interrupt && !phydev->drv->did_interrupt(phydev))
809+
if (phydev->drv->did_interrupt && !phy_did_interrupt(phydev))
775810
return IRQ_NONE;
776811

777812
if (phydev->drv->handle_interrupt) {
778-
if (phydev->drv->handle_interrupt(phydev))
813+
if (phy_handle_interrupt(phydev))
779814
goto phy_err;
780815
} else {
781816
/* reschedule state queue work to run as soon as possible */

0 commit comments

Comments
 (0)