Skip to content

Commit 924dd58

Browse files
Martin PeschkeJames Bottomley
Martin Peschke
authored and
James Bottomley
committed
[SCSI] zfcp: fix schedule-inside-lock in scsi_device list loops
BUG: sleeping function called from invalid context at kernel/workqueue.c:2752 in_atomic(): 1, irqs_disabled(): 1, pid: 360, name: zfcperp0.0.1700 CPU: 1 Not tainted 3.9.3+ #69 Process zfcperp0.0.1700 (pid: 360, task: 0000000075b7e080, ksp: 000000007476bc30) <snip> Call Trace: ([<00000000001165de>] show_trace+0x106/0x154) [<00000000001166a0>] show_stack+0x74/0xf4 [<00000000006ff646>] dump_stack+0xc6/0xd4 [<000000000017f3a0>] __might_sleep+0x128/0x148 [<000000000015ece8>] flush_work+0x54/0x1f8 [<00000000001630de>] __cancel_work_timer+0xc6/0x128 [<00000000005067ac>] scsi_device_dev_release_usercontext+0x164/0x23c [<0000000000161816>] execute_in_process_context+0x96/0xa8 [<00000000004d33d8>] device_release+0x60/0xc0 [<000000000048af48>] kobject_release+0xa8/0x1c4 [<00000000004f4bf2>] __scsi_iterate_devices+0xfa/0x130 [<000003ff801b307a>] zfcp_erp_strategy+0x4da/0x1014 [zfcp] [<000003ff801b3caa>] zfcp_erp_thread+0xf6/0x2b0 [zfcp] [<000000000016b75a>] kthread+0xf2/0xfc [<000000000070c9de>] kernel_thread_starter+0x6/0xc [<000000000070c9d8>] kernel_thread_starter+0x0/0xc Apparently, the ref_count for some scsi_device drops down to zero, triggering device removal through execute_in_process_context(), while the lldd error recovery thread iterates through a scsi device list. Unfortunately, execute_in_process_context() decides to immediately execute that device removal function, instead of scheduling asynchronous execution, since it detects process context and thinks it is safe to do so. But almost all calls to shost_for_each_device() in our lldd are inside spin_lock_irq, even in thread context. Obviously, schedule() inside spin_lock_irq sections is a bad idea. Change the lldd to use the proper iterator function, __shost_for_each_device(), in combination with required locking. Occurences that need to be changed include all calls in zfcp_erp.c, since those might be executed in zfcp error recovery thread context with a lock held. Other occurences of shost_for_each_device() in zfcp_fsf.c do not need to be changed (no process context, no surrounding locking). The problem was introduced in Linux 2.6.37 by commit b62a8d9 "[SCSI] zfcp: Use SCSI device data zfcp_scsi_dev instead of zfcp_unit". Reported-by: Christian Borntraeger <[email protected]> Signed-off-by: Martin Peschke <[email protected]> Cc: [email protected] #2.6.37+ Signed-off-by: Steffen Maier <[email protected]> Signed-off-by: James Bottomley <[email protected]>
1 parent d79ff14 commit 924dd58

File tree

1 file changed

+22
-7
lines changed

1 file changed

+22
-7
lines changed

drivers/s390/scsi/zfcp_erp.c

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,13 @@ static void zfcp_erp_action_dismiss_port(struct zfcp_port *port)
102102

103103
if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_INUSE)
104104
zfcp_erp_action_dismiss(&port->erp_action);
105-
else
106-
shost_for_each_device(sdev, port->adapter->scsi_host)
105+
else {
106+
spin_lock(port->adapter->scsi_host->host_lock);
107+
__shost_for_each_device(sdev, port->adapter->scsi_host)
107108
if (sdev_to_zfcp(sdev)->port == port)
108109
zfcp_erp_action_dismiss_lun(sdev);
110+
spin_unlock(port->adapter->scsi_host->host_lock);
111+
}
109112
}
110113

111114
static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
@@ -592,9 +595,11 @@ static void _zfcp_erp_lun_reopen_all(struct zfcp_port *port, int clear,
592595
{
593596
struct scsi_device *sdev;
594597

595-
shost_for_each_device(sdev, port->adapter->scsi_host)
598+
spin_lock(port->adapter->scsi_host->host_lock);
599+
__shost_for_each_device(sdev, port->adapter->scsi_host)
596600
if (sdev_to_zfcp(sdev)->port == port)
597601
_zfcp_erp_lun_reopen(sdev, clear, id, 0);
602+
spin_unlock(port->adapter->scsi_host->host_lock);
598603
}
599604

600605
static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act)
@@ -1434,8 +1439,10 @@ void zfcp_erp_set_adapter_status(struct zfcp_adapter *adapter, u32 mask)
14341439
atomic_set_mask(common_mask, &port->status);
14351440
read_unlock_irqrestore(&adapter->port_list_lock, flags);
14361441

1437-
shost_for_each_device(sdev, adapter->scsi_host)
1442+
spin_lock_irqsave(adapter->scsi_host->host_lock, flags);
1443+
__shost_for_each_device(sdev, adapter->scsi_host)
14381444
atomic_set_mask(common_mask, &sdev_to_zfcp(sdev)->status);
1445+
spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags);
14391446
}
14401447

14411448
/**
@@ -1469,11 +1476,13 @@ void zfcp_erp_clear_adapter_status(struct zfcp_adapter *adapter, u32 mask)
14691476
}
14701477
read_unlock_irqrestore(&adapter->port_list_lock, flags);
14711478

1472-
shost_for_each_device(sdev, adapter->scsi_host) {
1479+
spin_lock_irqsave(adapter->scsi_host->host_lock, flags);
1480+
__shost_for_each_device(sdev, adapter->scsi_host) {
14731481
atomic_clear_mask(common_mask, &sdev_to_zfcp(sdev)->status);
14741482
if (clear_counter)
14751483
atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0);
14761484
}
1485+
spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags);
14771486
}
14781487

14791488
/**
@@ -1487,16 +1496,19 @@ void zfcp_erp_set_port_status(struct zfcp_port *port, u32 mask)
14871496
{
14881497
struct scsi_device *sdev;
14891498
u32 common_mask = mask & ZFCP_COMMON_FLAGS;
1499+
unsigned long flags;
14901500

14911501
atomic_set_mask(mask, &port->status);
14921502

14931503
if (!common_mask)
14941504
return;
14951505

1496-
shost_for_each_device(sdev, port->adapter->scsi_host)
1506+
spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags);
1507+
__shost_for_each_device(sdev, port->adapter->scsi_host)
14971508
if (sdev_to_zfcp(sdev)->port == port)
14981509
atomic_set_mask(common_mask,
14991510
&sdev_to_zfcp(sdev)->status);
1511+
spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags);
15001512
}
15011513

15021514
/**
@@ -1511,6 +1523,7 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask)
15111523
struct scsi_device *sdev;
15121524
u32 common_mask = mask & ZFCP_COMMON_FLAGS;
15131525
u32 clear_counter = mask & ZFCP_STATUS_COMMON_ERP_FAILED;
1526+
unsigned long flags;
15141527

15151528
atomic_clear_mask(mask, &port->status);
15161529

@@ -1520,13 +1533,15 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask)
15201533
if (clear_counter)
15211534
atomic_set(&port->erp_counter, 0);
15221535

1523-
shost_for_each_device(sdev, port->adapter->scsi_host)
1536+
spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags);
1537+
__shost_for_each_device(sdev, port->adapter->scsi_host)
15241538
if (sdev_to_zfcp(sdev)->port == port) {
15251539
atomic_clear_mask(common_mask,
15261540
&sdev_to_zfcp(sdev)->status);
15271541
if (clear_counter)
15281542
atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0);
15291543
}
1544+
spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags);
15301545
}
15311546

15321547
/**

0 commit comments

Comments
 (0)