Skip to content

Commit d26ab06

Browse files
sswenJames Bottomley
authored and
James Bottomley
committed
[SCSI] zfcp: receiving an unsolicted status can lead to I/O stall
Processing of an unsolicted status request can lead to a locking race of the request_queue's queue_lock during the recreation of the used up status read request while still in interrupt context of the response handler. Detaching the 'refill' of the long running status read requests from the handler to a scheduled work is solving this issue. In addition, each refill-run is trying to re-establish the full amount of status read requests, which might have failed in earlier runs. Signed-off-by: Swen Schillig <[email protected]> Signed-off-by: Christof Schmitt <[email protected]> Signed-off-by: James Bottomley <[email protected]>
1 parent 1542492 commit d26ab06

File tree

6 files changed

+32
-35
lines changed

6 files changed

+32
-35
lines changed

drivers/s390/scsi/zfcp_aux.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,27 @@ static void zfcp_dummy_release(struct device *dev)
970970
return;
971971
}
972972

973+
int zfcp_status_read_refill(struct zfcp_adapter *adapter)
974+
{
975+
while (atomic_read(&adapter->stat_miss) > 0)
976+
if (zfcp_fsf_status_read(adapter, ZFCP_WAIT_FOR_SBAL))
977+
break;
978+
else
979+
atomic_dec(&adapter->stat_miss);
980+
981+
if (ZFCP_STATUS_READS_RECOM <= atomic_read(&adapter->stat_miss)) {
982+
zfcp_erp_adapter_reopen(adapter, 0, 103, NULL);
983+
return 1;
984+
}
985+
return 0;
986+
}
987+
988+
static void _zfcp_status_read_scheduler(struct work_struct *work)
989+
{
990+
zfcp_status_read_refill(container_of(work, struct zfcp_adapter,
991+
stat_work));
992+
}
993+
973994
/*
974995
* Enqueues an adapter at the end of the adapter list in the driver data.
975996
* All adapter internal structures are set up.
@@ -1063,6 +1084,7 @@ zfcp_adapter_enqueue(struct ccw_device *ccw_device)
10631084

10641085
/* initialize lock of associated request queue */
10651086
rwlock_init(&adapter->request_queue.queue_lock);
1087+
INIT_WORK(&adapter->stat_work, _zfcp_status_read_scheduler);
10661088

10671089
/* mark adapter unusable as long as sysfs registration is not complete */
10681090
atomic_set_mask(ZFCP_STATUS_COMMON_REMOVE, &adapter->status);
@@ -1123,6 +1145,7 @@ zfcp_adapter_dequeue(struct zfcp_adapter *adapter)
11231145
int retval = 0;
11241146
unsigned long flags;
11251147

1148+
cancel_work_sync(&adapter->stat_work);
11261149
zfcp_adapter_scsi_unregister(adapter);
11271150
device_unregister(&adapter->generic_services);
11281151
zfcp_sysfs_adapter_remove_files(&adapter->ccw_device->dev);

drivers/s390/scsi/zfcp_dbf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ void zfcp_hba_dbf_event_fsf_unsol(const char *tag, struct zfcp_adapter *adapter,
268268
strncpy(rec->tag, "stat", ZFCP_DBF_TAG_SIZE);
269269
strncpy(rec->tag2, tag, ZFCP_DBF_TAG_SIZE);
270270

271-
rec->u.status.failed = adapter->status_read_failed;
271+
rec->u.status.failed = atomic_read(&adapter->stat_miss);
272272
if (status_buffer != NULL) {
273273
rec->u.status.status_type = status_buffer->status_type;
274274
rec->u.status.status_subtype = status_buffer->status_subtype;

drivers/s390/scsi/zfcp_def.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,6 @@ zfcp_address_to_sg(void *address, struct scatterlist *list, unsigned int size)
136136
#define ZFCP_QTCB_VERSION FSF_QTCB_CURRENT_VERSION
137137
/* ATTENTION: value must not be used by hardware */
138138
#define FSF_QTCB_UNSOLICITED_STATUS 0x6305
139-
#define ZFCP_STATUS_READ_FAILED_THRESHOLD 3
140139
#define ZFCP_STATUS_READS_RECOM FSF_STATUS_READS_RECOM
141140

142141
/* Do 1st retry in 1 second, then double the timeout for each following retry */
@@ -759,7 +758,8 @@ struct zfcp_adapter {
759758
rwlock_t abort_lock; /* Protects against SCSI
760759
stack abort/command
761760
completion races */
762-
u16 status_read_failed; /* # failed status reads */
761+
atomic_t stat_miss; /* # missing status reads*/
762+
struct work_struct stat_work;
763763
atomic_t status; /* status of this adapter */
764764
struct list_head erp_ready_head; /* error recovery for this
765765
adapter/devices */

drivers/s390/scsi/zfcp_erp.c

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2139,25 +2139,10 @@ static int
21392139
zfcp_erp_adapter_strategy_open_fsf_statusread(struct zfcp_erp_action
21402140
*erp_action)
21412141
{
2142-
int retval = ZFCP_ERP_SUCCEEDED;
2143-
int temp_ret;
21442142
struct zfcp_adapter *adapter = erp_action->adapter;
2145-
int i;
21462143

2147-
adapter->status_read_failed = 0;
2148-
for (i = 0; i < ZFCP_STATUS_READS_RECOM; i++) {
2149-
temp_ret = zfcp_fsf_status_read(adapter, ZFCP_WAIT_FOR_SBAL);
2150-
if (temp_ret < 0) {
2151-
ZFCP_LOG_INFO("error: set-up of unsolicited status "
2152-
"notification failed on adapter %s\n",
2153-
zfcp_get_busid_by_adapter(adapter));
2154-
retval = ZFCP_ERP_FAILED;
2155-
i--;
2156-
break;
2157-
}
2158-
}
2159-
2160-
return retval;
2144+
atomic_set(&adapter->stat_miss, 16);
2145+
return zfcp_status_read_refill(adapter);
21612146
}
21622147

21632148
/*

drivers/s390/scsi/zfcp_ext.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ extern void zfcp_fsf_start_timer(struct zfcp_fsf_req *, unsigned long);
9292
extern void zfcp_erp_start_timer(struct zfcp_fsf_req *);
9393
extern void zfcp_fsf_req_dismiss_all(struct zfcp_adapter *);
9494
extern int zfcp_fsf_status_read(struct zfcp_adapter *, int);
95+
extern int zfcp_status_read_refill(struct zfcp_adapter *adapter);
9596
extern int zfcp_fsf_req_create(struct zfcp_adapter *, u32, int, mempool_t *,
9697
unsigned long *, struct zfcp_fsf_req **);
9798
extern int zfcp_fsf_send_ct(struct zfcp_send_ct *, mempool_t *,

drivers/s390/scsi/zfcp_fsf.c

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,21 +1029,9 @@ zfcp_fsf_status_read_handler(struct zfcp_fsf_req *fsf_req)
10291029
* FIXME:
10301030
* allocation failure possible? (Is this code needed?)
10311031
*/
1032-
retval = zfcp_fsf_status_read(adapter, 0);
1033-
if (retval < 0) {
1034-
ZFCP_LOG_INFO("Failed to create unsolicited status read "
1035-
"request for the adapter %s.\n",
1036-
zfcp_get_busid_by_adapter(adapter));
1037-
/* temporary fix to avoid status read buffer shortage */
1038-
adapter->status_read_failed++;
1039-
if ((ZFCP_STATUS_READS_RECOM - adapter->status_read_failed)
1040-
< ZFCP_STATUS_READ_FAILED_THRESHOLD) {
1041-
ZFCP_LOG_INFO("restart adapter %s due to status read "
1042-
"buffer shortage\n",
1043-
zfcp_get_busid_by_adapter(adapter));
1044-
zfcp_erp_adapter_reopen(adapter, 0, 103, fsf_req);
1045-
}
1046-
}
1032+
1033+
atomic_inc(&adapter->stat_miss);
1034+
schedule_work(&adapter->stat_work);
10471035
out:
10481036
return retval;
10491037
}

0 commit comments

Comments
 (0)