@@ -133,6 +133,7 @@ enum {
133
133
134
134
struct global_cwq ;
135
135
struct worker_pool ;
136
+ struct idle_rebind ;
136
137
137
138
/*
138
139
* The poor guys doing the actual heavy lifting. All on-duty workers
@@ -154,7 +155,10 @@ struct worker {
154
155
unsigned long last_active ; /* L: last active timestamp */
155
156
unsigned int flags ; /* X: flags */
156
157
int id ; /* I: worker id */
157
- struct work_struct rebind_work ; /* L: rebind worker to cpu */
158
+
159
+ /* for rebinding worker to CPU */
160
+ struct idle_rebind * idle_rebind ; /* L: for idle worker */
161
+ struct work_struct rebind_work ; /* L: for busy worker */
158
162
};
159
163
160
164
struct worker_pool {
@@ -190,6 +194,8 @@ struct global_cwq {
190
194
191
195
struct worker_pool pools [2 ]; /* normal and highpri pools */
192
196
197
+ wait_queue_head_t rebind_hold ; /* rebind hold wait */
198
+
193
199
struct task_struct * trustee ; /* L: for gcwq shutdown */
194
200
unsigned int trustee_state ; /* L: trustee state */
195
201
wait_queue_head_t trustee_wait ; /* trustee wait */
@@ -1314,13 +1320,37 @@ __acquires(&gcwq->lock)
1314
1320
}
1315
1321
}
1316
1322
1323
+ struct idle_rebind {
1324
+ int cnt ; /* # workers to be rebound */
1325
+ struct completion done ; /* all workers rebound */
1326
+ };
1327
+
1328
+ /*
1329
+ * Rebind an idle @worker to its CPU. During CPU onlining, this has to
1330
+ * happen synchronously for idle workers. worker_thread() will test
1331
+ * %WORKER_REBIND before leaving idle and call this function.
1332
+ */
1333
+ static void idle_worker_rebind (struct worker * worker )
1334
+ {
1335
+ struct global_cwq * gcwq = worker -> pool -> gcwq ;
1336
+
1337
+ /* CPU must be online at this point */
1338
+ WARN_ON (!worker_maybe_bind_and_lock (worker ));
1339
+ if (!-- worker -> idle_rebind -> cnt )
1340
+ complete (& worker -> idle_rebind -> done );
1341
+ spin_unlock_irq (& worker -> pool -> gcwq -> lock );
1342
+
1343
+ /* we did our part, wait for rebind_workers() to finish up */
1344
+ wait_event (gcwq -> rebind_hold , !(worker -> flags & WORKER_REBIND ));
1345
+ }
1346
+
1317
1347
/*
1318
- * Function for worker->rebind_work used to rebind unbound busy workers to
1348
+ * Function for @ worker->rebind.work used to rebind unbound busy workers to
1319
1349
* the associated cpu which is coming back online. This is scheduled by
1320
1350
* cpu up but can race with other cpu hotplug operations and may be
1321
1351
* executed twice without intervening cpu down.
1322
1352
*/
1323
- static void worker_rebind_fn (struct work_struct * work )
1353
+ static void busy_worker_rebind_fn (struct work_struct * work )
1324
1354
{
1325
1355
struct worker * worker = container_of (work , struct worker , rebind_work );
1326
1356
struct global_cwq * gcwq = worker -> pool -> gcwq ;
@@ -1331,6 +1361,112 @@ static void worker_rebind_fn(struct work_struct *work)
1331
1361
spin_unlock_irq (& gcwq -> lock );
1332
1362
}
1333
1363
1364
+ /**
1365
+ * rebind_workers - rebind all workers of a gcwq to the associated CPU
1366
+ * @gcwq: gcwq of interest
1367
+ *
1368
+ * @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding
1369
+ * is different for idle and busy ones.
1370
+ *
1371
+ * The idle ones should be rebound synchronously and idle rebinding should
1372
+ * be complete before any worker starts executing work items with
1373
+ * concurrency management enabled; otherwise, scheduler may oops trying to
1374
+ * wake up non-local idle worker from wq_worker_sleeping().
1375
+ *
1376
+ * This is achieved by repeatedly requesting rebinding until all idle
1377
+ * workers are known to have been rebound under @gcwq->lock and holding all
1378
+ * idle workers from becoming busy until idle rebinding is complete.
1379
+ *
1380
+ * Once idle workers are rebound, busy workers can be rebound as they
1381
+ * finish executing their current work items. Queueing the rebind work at
1382
+ * the head of their scheduled lists is enough. Note that nr_running will
1383
+ * be properbly bumped as busy workers rebind.
1384
+ *
1385
+ * On return, all workers are guaranteed to either be bound or have rebind
1386
+ * work item scheduled.
1387
+ */
1388
+ static void rebind_workers (struct global_cwq * gcwq )
1389
+ __releases (& gcwq - > lock ) __acquires (& gcwq - > lock )
1390
+ {
1391
+ struct idle_rebind idle_rebind ;
1392
+ struct worker_pool * pool ;
1393
+ struct worker * worker ;
1394
+ struct hlist_node * pos ;
1395
+ int i ;
1396
+
1397
+ lockdep_assert_held (& gcwq -> lock );
1398
+
1399
+ for_each_worker_pool (pool , gcwq )
1400
+ lockdep_assert_held (& pool -> manager_mutex );
1401
+
1402
+ /*
1403
+ * Rebind idle workers. Interlocked both ways. We wait for
1404
+ * workers to rebind via @idle_rebind.done. Workers will wait for
1405
+ * us to finish up by watching %WORKER_REBIND.
1406
+ */
1407
+ init_completion (& idle_rebind .done );
1408
+ retry :
1409
+ idle_rebind .cnt = 1 ;
1410
+ INIT_COMPLETION (idle_rebind .done );
1411
+
1412
+ /* set REBIND and kick idle ones, we'll wait for these later */
1413
+ for_each_worker_pool (pool , gcwq ) {
1414
+ list_for_each_entry (worker , & pool -> idle_list , entry ) {
1415
+ if (worker -> flags & WORKER_REBIND )
1416
+ continue ;
1417
+
1418
+ /* morph UNBOUND to REBIND */
1419
+ worker -> flags &= ~WORKER_UNBOUND ;
1420
+ worker -> flags |= WORKER_REBIND ;
1421
+
1422
+ idle_rebind .cnt ++ ;
1423
+ worker -> idle_rebind = & idle_rebind ;
1424
+
1425
+ /* worker_thread() will call idle_worker_rebind() */
1426
+ wake_up_process (worker -> task );
1427
+ }
1428
+ }
1429
+
1430
+ if (-- idle_rebind .cnt ) {
1431
+ spin_unlock_irq (& gcwq -> lock );
1432
+ wait_for_completion (& idle_rebind .done );
1433
+ spin_lock_irq (& gcwq -> lock );
1434
+ /* busy ones might have become idle while waiting, retry */
1435
+ goto retry ;
1436
+ }
1437
+
1438
+ /*
1439
+ * All idle workers are rebound and waiting for %WORKER_REBIND to
1440
+ * be cleared inside idle_worker_rebind(). Clear and release.
1441
+ * Clearing %WORKER_REBIND from this foreign context is safe
1442
+ * because these workers are still guaranteed to be idle.
1443
+ */
1444
+ for_each_worker_pool (pool , gcwq )
1445
+ list_for_each_entry (worker , & pool -> idle_list , entry )
1446
+ worker -> flags &= ~WORKER_REBIND ;
1447
+
1448
+ wake_up_all (& gcwq -> rebind_hold );
1449
+
1450
+ /* rebind busy workers */
1451
+ for_each_busy_worker (worker , i , pos , gcwq ) {
1452
+ struct work_struct * rebind_work = & worker -> rebind_work ;
1453
+
1454
+ /* morph UNBOUND to REBIND */
1455
+ worker -> flags &= ~WORKER_UNBOUND ;
1456
+ worker -> flags |= WORKER_REBIND ;
1457
+
1458
+ if (test_and_set_bit (WORK_STRUCT_PENDING_BIT ,
1459
+ work_data_bits (rebind_work )))
1460
+ continue ;
1461
+
1462
+ /* wq doesn't matter, use the default one */
1463
+ debug_work_activate (rebind_work );
1464
+ insert_work (get_cwq (gcwq -> cpu , system_wq ), rebind_work ,
1465
+ worker -> scheduled .next ,
1466
+ work_color_to_flags (WORK_NO_COLOR ));
1467
+ }
1468
+ }
1469
+
1334
1470
static struct worker * alloc_worker (void )
1335
1471
{
1336
1472
struct worker * worker ;
@@ -1339,7 +1475,7 @@ static struct worker *alloc_worker(void)
1339
1475
if (worker ) {
1340
1476
INIT_LIST_HEAD (& worker -> entry );
1341
1477
INIT_LIST_HEAD (& worker -> scheduled );
1342
- INIT_WORK (& worker -> rebind_work , worker_rebind_fn );
1478
+ INIT_WORK (& worker -> rebind_work , busy_worker_rebind_fn );
1343
1479
/* on creation a worker is in !idle && prep state */
1344
1480
worker -> flags = WORKER_PREP ;
1345
1481
}
@@ -1829,6 +1965,9 @@ __acquires(&gcwq->lock)
1829
1965
1830
1966
lockdep_copy_map (& lockdep_map , & work -> lockdep_map );
1831
1967
#endif
1968
+ WARN_ON_ONCE (!(worker -> flags & (WORKER_UNBOUND | WORKER_REBIND )) &&
1969
+ raw_smp_processor_id () != gcwq -> cpu );
1970
+
1832
1971
/*
1833
1972
* A single work shouldn't be executed concurrently by
1834
1973
* multiple workers on a single cpu. Check whether anyone is
@@ -1946,11 +2085,20 @@ static int worker_thread(void *__worker)
1946
2085
woke_up :
1947
2086
spin_lock_irq (& gcwq -> lock );
1948
2087
1949
- /* DIE can be set only while we're idle, checking here is enough */
1950
- if (worker -> flags & WORKER_DIE ) {
2088
+ /*
2089
+ * DIE can be set only while idle and REBIND set while busy has
2090
+ * @worker->rebind_work scheduled. Checking here is enough.
2091
+ */
2092
+ if (unlikely (worker -> flags & (WORKER_REBIND | WORKER_DIE ))) {
1951
2093
spin_unlock_irq (& gcwq -> lock );
1952
- worker -> task -> flags &= ~PF_WQ_WORKER ;
1953
- return 0 ;
2094
+
2095
+ if (worker -> flags & WORKER_DIE ) {
2096
+ worker -> task -> flags &= ~PF_WQ_WORKER ;
2097
+ return 0 ;
2098
+ }
2099
+
2100
+ idle_worker_rebind (worker );
2101
+ goto woke_up ;
1954
2102
}
1955
2103
1956
2104
worker_leave_idle (worker );
@@ -3468,42 +3616,6 @@ static int __cpuinit trustee_thread(void *__gcwq)
3468
3616
}
3469
3617
} while (i && rc >= 0 );
3470
3618
3471
- /*
3472
- * At this point, either draining has completed and no worker
3473
- * is left, or cpu down has been canceled or the cpu is being
3474
- * brought back up. There shouldn't be any idle one left.
3475
- * Tell the remaining busy ones to rebind once it finishes the
3476
- * currently scheduled works by scheduling the rebind_work.
3477
- */
3478
- for_each_worker_pool (pool , gcwq )
3479
- WARN_ON (!list_empty (& pool -> idle_list ));
3480
-
3481
- /* if we're reassociating, clear DISASSOCIATED */
3482
- if (gcwq -> trustee_state == TRUSTEE_RELEASE )
3483
- gcwq -> flags &= ~GCWQ_DISASSOCIATED ;
3484
-
3485
- for_each_busy_worker (worker , i , pos , gcwq ) {
3486
- struct work_struct * rebind_work = & worker -> rebind_work ;
3487
-
3488
- /*
3489
- * Rebind_work may race with future cpu hotplug
3490
- * operations. Use a separate flag to mark that
3491
- * rebinding is scheduled.
3492
- */
3493
- worker -> flags |= WORKER_REBIND ;
3494
- worker -> flags &= ~WORKER_UNBOUND ;
3495
-
3496
- /* queue rebind_work, wq doesn't matter, use the default one */
3497
- if (test_and_set_bit (WORK_STRUCT_PENDING_BIT ,
3498
- work_data_bits (rebind_work )))
3499
- continue ;
3500
-
3501
- debug_work_activate (rebind_work );
3502
- insert_work (get_cwq (gcwq -> cpu , system_wq ), rebind_work ,
3503
- worker -> scheduled .next ,
3504
- work_color_to_flags (WORK_NO_COLOR ));
3505
- }
3506
-
3507
3619
gcwq_release_management (gcwq );
3508
3620
3509
3621
/* notify completion */
@@ -3609,13 +3721,16 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
3609
3721
wait_trustee_state (gcwq , TRUSTEE_DONE );
3610
3722
}
3611
3723
3612
- /*
3613
- * Either DISASSOCIATED is already cleared or no worker is
3614
- * left on the gcwq. Safe to clear DISASSOCIATED without
3615
- * claiming managers.
3616
- */
3724
+ spin_unlock_irq (& gcwq -> lock );
3725
+ gcwq_claim_management (gcwq );
3726
+ spin_lock_irq (& gcwq -> lock );
3727
+
3617
3728
gcwq -> flags &= ~GCWQ_DISASSOCIATED ;
3618
3729
3730
+ rebind_workers (gcwq );
3731
+
3732
+ gcwq_release_management (gcwq );
3733
+
3619
3734
/*
3620
3735
* Trustee is done and there might be no worker left.
3621
3736
* Put the first_idle in and request a real manager to
@@ -3910,6 +4025,8 @@ static int __init init_workqueues(void)
3910
4025
ida_init (& pool -> worker_ida );
3911
4026
}
3912
4027
4028
+ init_waitqueue_head (& gcwq -> rebind_hold );
4029
+
3913
4030
gcwq -> trustee_state = TRUSTEE_DONE ;
3914
4031
init_waitqueue_head (& gcwq -> trustee_wait );
3915
4032
}
0 commit comments