Skip to content

Commit 25511a4

Browse files
committed
workqueue: reimplement CPU online rebinding to handle idle workers
Currently, if there are left workers when a CPU is being brough back online, the trustee kills all idle workers and scheduled rebind_work so that they re-bind to the CPU after the currently executing work is finished. This works for busy workers because concurrency management doesn't try to wake up them from scheduler callbacks, which require the target task to be on the local run queue. The busy worker bumps concurrency counter appropriately as it clears WORKER_UNBOUND from the rebind work item and it's bound to the CPU before returning to the idle state. To reduce CPU on/offlining overhead (as many embedded systems use it for powersaving) and simplify the code path, workqueue is planned to be modified to retain idle workers across CPU on/offlining. This patch reimplements CPU online rebinding such that it can also handle idle workers. As noted earlier, due to the local wakeup requirement, rebinding idle workers is tricky. All idle workers must be re-bound before scheduler callbacks are enabled. This is achieved by interlocking idle re-binding. Idle workers are requested to re-bind and then hold until all idle re-binding is complete so that no bound worker starts executing work item. Only after all idle workers are re-bound and parked, CPU_ONLINE proceeds to release them and queue rebind work item to busy workers thus guaranteeing scheduler callbacks aren't invoked until all idle workers are ready. worker_rebind_fn() is renamed to busy_worker_rebind_fn() and idle_worker_rebind() for idle workers is added. Rebinding logic is moved to rebind_workers() and now called from CPU_ONLINE after flushing trustee. While at it, add CPU sanity check in worker_thread(). Note that now a worker may become idle or the manager between trustee release and rebinding during CPU_ONLINE. As the previous patch updated create_worker() so that it can be used by regular manager while unbound and this patch implements idle re-binding, this is safe. This prepares for removal of trustee and keeping idle workers across CPU hotplugs. Signed-off-by: Tejun Heo <[email protected]> Acked-by: "Rafael J. Wysocki" <[email protected]>
1 parent bc2ae0f commit 25511a4

File tree

1 file changed

+166
-49
lines changed

1 file changed

+166
-49
lines changed

kernel/workqueue.c

Lines changed: 166 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ enum {
133133

134134
struct global_cwq;
135135
struct worker_pool;
136+
struct idle_rebind;
136137

137138
/*
138139
* The poor guys doing the actual heavy lifting. All on-duty workers
@@ -154,7 +155,10 @@ struct worker {
154155
unsigned long last_active; /* L: last active timestamp */
155156
unsigned int flags; /* X: flags */
156157
int id; /* I: worker id */
157-
struct work_struct rebind_work; /* L: rebind worker to cpu */
158+
159+
/* for rebinding worker to CPU */
160+
struct idle_rebind *idle_rebind; /* L: for idle worker */
161+
struct work_struct rebind_work; /* L: for busy worker */
158162
};
159163

160164
struct worker_pool {
@@ -190,6 +194,8 @@ struct global_cwq {
190194

191195
struct worker_pool pools[2]; /* normal and highpri pools */
192196

197+
wait_queue_head_t rebind_hold; /* rebind hold wait */
198+
193199
struct task_struct *trustee; /* L: for gcwq shutdown */
194200
unsigned int trustee_state; /* L: trustee state */
195201
wait_queue_head_t trustee_wait; /* trustee wait */
@@ -1314,13 +1320,37 @@ __acquires(&gcwq->lock)
13141320
}
13151321
}
13161322

1323+
struct idle_rebind {
1324+
int cnt; /* # workers to be rebound */
1325+
struct completion done; /* all workers rebound */
1326+
};
1327+
1328+
/*
1329+
* Rebind an idle @worker to its CPU. During CPU onlining, this has to
1330+
* happen synchronously for idle workers. worker_thread() will test
1331+
* %WORKER_REBIND before leaving idle and call this function.
1332+
*/
1333+
static void idle_worker_rebind(struct worker *worker)
1334+
{
1335+
struct global_cwq *gcwq = worker->pool->gcwq;
1336+
1337+
/* CPU must be online at this point */
1338+
WARN_ON(!worker_maybe_bind_and_lock(worker));
1339+
if (!--worker->idle_rebind->cnt)
1340+
complete(&worker->idle_rebind->done);
1341+
spin_unlock_irq(&worker->pool->gcwq->lock);
1342+
1343+
/* we did our part, wait for rebind_workers() to finish up */
1344+
wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
1345+
}
1346+
13171347
/*
1318-
* Function for worker->rebind_work used to rebind unbound busy workers to
1348+
* Function for @worker->rebind.work used to rebind unbound busy workers to
13191349
* the associated cpu which is coming back online. This is scheduled by
13201350
* cpu up but can race with other cpu hotplug operations and may be
13211351
* executed twice without intervening cpu down.
13221352
*/
1323-
static void worker_rebind_fn(struct work_struct *work)
1353+
static void busy_worker_rebind_fn(struct work_struct *work)
13241354
{
13251355
struct worker *worker = container_of(work, struct worker, rebind_work);
13261356
struct global_cwq *gcwq = worker->pool->gcwq;
@@ -1331,6 +1361,112 @@ static void worker_rebind_fn(struct work_struct *work)
13311361
spin_unlock_irq(&gcwq->lock);
13321362
}
13331363

1364+
/**
1365+
* rebind_workers - rebind all workers of a gcwq to the associated CPU
1366+
* @gcwq: gcwq of interest
1367+
*
1368+
* @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding
1369+
* is different for idle and busy ones.
1370+
*
1371+
* The idle ones should be rebound synchronously and idle rebinding should
1372+
* be complete before any worker starts executing work items with
1373+
* concurrency management enabled; otherwise, scheduler may oops trying to
1374+
* wake up non-local idle worker from wq_worker_sleeping().
1375+
*
1376+
* This is achieved by repeatedly requesting rebinding until all idle
1377+
* workers are known to have been rebound under @gcwq->lock and holding all
1378+
* idle workers from becoming busy until idle rebinding is complete.
1379+
*
1380+
* Once idle workers are rebound, busy workers can be rebound as they
1381+
* finish executing their current work items. Queueing the rebind work at
1382+
* the head of their scheduled lists is enough. Note that nr_running will
1383+
* be properbly bumped as busy workers rebind.
1384+
*
1385+
* On return, all workers are guaranteed to either be bound or have rebind
1386+
* work item scheduled.
1387+
*/
1388+
static void rebind_workers(struct global_cwq *gcwq)
1389+
__releases(&gcwq->lock) __acquires(&gcwq->lock)
1390+
{
1391+
struct idle_rebind idle_rebind;
1392+
struct worker_pool *pool;
1393+
struct worker *worker;
1394+
struct hlist_node *pos;
1395+
int i;
1396+
1397+
lockdep_assert_held(&gcwq->lock);
1398+
1399+
for_each_worker_pool(pool, gcwq)
1400+
lockdep_assert_held(&pool->manager_mutex);
1401+
1402+
/*
1403+
* Rebind idle workers. Interlocked both ways. We wait for
1404+
* workers to rebind via @idle_rebind.done. Workers will wait for
1405+
* us to finish up by watching %WORKER_REBIND.
1406+
*/
1407+
init_completion(&idle_rebind.done);
1408+
retry:
1409+
idle_rebind.cnt = 1;
1410+
INIT_COMPLETION(idle_rebind.done);
1411+
1412+
/* set REBIND and kick idle ones, we'll wait for these later */
1413+
for_each_worker_pool(pool, gcwq) {
1414+
list_for_each_entry(worker, &pool->idle_list, entry) {
1415+
if (worker->flags & WORKER_REBIND)
1416+
continue;
1417+
1418+
/* morph UNBOUND to REBIND */
1419+
worker->flags &= ~WORKER_UNBOUND;
1420+
worker->flags |= WORKER_REBIND;
1421+
1422+
idle_rebind.cnt++;
1423+
worker->idle_rebind = &idle_rebind;
1424+
1425+
/* worker_thread() will call idle_worker_rebind() */
1426+
wake_up_process(worker->task);
1427+
}
1428+
}
1429+
1430+
if (--idle_rebind.cnt) {
1431+
spin_unlock_irq(&gcwq->lock);
1432+
wait_for_completion(&idle_rebind.done);
1433+
spin_lock_irq(&gcwq->lock);
1434+
/* busy ones might have become idle while waiting, retry */
1435+
goto retry;
1436+
}
1437+
1438+
/*
1439+
* All idle workers are rebound and waiting for %WORKER_REBIND to
1440+
* be cleared inside idle_worker_rebind(). Clear and release.
1441+
* Clearing %WORKER_REBIND from this foreign context is safe
1442+
* because these workers are still guaranteed to be idle.
1443+
*/
1444+
for_each_worker_pool(pool, gcwq)
1445+
list_for_each_entry(worker, &pool->idle_list, entry)
1446+
worker->flags &= ~WORKER_REBIND;
1447+
1448+
wake_up_all(&gcwq->rebind_hold);
1449+
1450+
/* rebind busy workers */
1451+
for_each_busy_worker(worker, i, pos, gcwq) {
1452+
struct work_struct *rebind_work = &worker->rebind_work;
1453+
1454+
/* morph UNBOUND to REBIND */
1455+
worker->flags &= ~WORKER_UNBOUND;
1456+
worker->flags |= WORKER_REBIND;
1457+
1458+
if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
1459+
work_data_bits(rebind_work)))
1460+
continue;
1461+
1462+
/* wq doesn't matter, use the default one */
1463+
debug_work_activate(rebind_work);
1464+
insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
1465+
worker->scheduled.next,
1466+
work_color_to_flags(WORK_NO_COLOR));
1467+
}
1468+
}
1469+
13341470
static struct worker *alloc_worker(void)
13351471
{
13361472
struct worker *worker;
@@ -1339,7 +1475,7 @@ static struct worker *alloc_worker(void)
13391475
if (worker) {
13401476
INIT_LIST_HEAD(&worker->entry);
13411477
INIT_LIST_HEAD(&worker->scheduled);
1342-
INIT_WORK(&worker->rebind_work, worker_rebind_fn);
1478+
INIT_WORK(&worker->rebind_work, busy_worker_rebind_fn);
13431479
/* on creation a worker is in !idle && prep state */
13441480
worker->flags = WORKER_PREP;
13451481
}
@@ -1829,6 +1965,9 @@ __acquires(&gcwq->lock)
18291965

18301966
lockdep_copy_map(&lockdep_map, &work->lockdep_map);
18311967
#endif
1968+
WARN_ON_ONCE(!(worker->flags & (WORKER_UNBOUND | WORKER_REBIND)) &&
1969+
raw_smp_processor_id() != gcwq->cpu);
1970+
18321971
/*
18331972
* A single work shouldn't be executed concurrently by
18341973
* multiple workers on a single cpu. Check whether anyone is
@@ -1946,11 +2085,20 @@ static int worker_thread(void *__worker)
19462085
woke_up:
19472086
spin_lock_irq(&gcwq->lock);
19482087

1949-
/* DIE can be set only while we're idle, checking here is enough */
1950-
if (worker->flags & WORKER_DIE) {
2088+
/*
2089+
* DIE can be set only while idle and REBIND set while busy has
2090+
* @worker->rebind_work scheduled. Checking here is enough.
2091+
*/
2092+
if (unlikely(worker->flags & (WORKER_REBIND | WORKER_DIE))) {
19512093
spin_unlock_irq(&gcwq->lock);
1952-
worker->task->flags &= ~PF_WQ_WORKER;
1953-
return 0;
2094+
2095+
if (worker->flags & WORKER_DIE) {
2096+
worker->task->flags &= ~PF_WQ_WORKER;
2097+
return 0;
2098+
}
2099+
2100+
idle_worker_rebind(worker);
2101+
goto woke_up;
19542102
}
19552103

19562104
worker_leave_idle(worker);
@@ -3468,42 +3616,6 @@ static int __cpuinit trustee_thread(void *__gcwq)
34683616
}
34693617
} while (i && rc >= 0);
34703618

3471-
/*
3472-
* At this point, either draining has completed and no worker
3473-
* is left, or cpu down has been canceled or the cpu is being
3474-
* brought back up. There shouldn't be any idle one left.
3475-
* Tell the remaining busy ones to rebind once it finishes the
3476-
* currently scheduled works by scheduling the rebind_work.
3477-
*/
3478-
for_each_worker_pool(pool, gcwq)
3479-
WARN_ON(!list_empty(&pool->idle_list));
3480-
3481-
/* if we're reassociating, clear DISASSOCIATED */
3482-
if (gcwq->trustee_state == TRUSTEE_RELEASE)
3483-
gcwq->flags &= ~GCWQ_DISASSOCIATED;
3484-
3485-
for_each_busy_worker(worker, i, pos, gcwq) {
3486-
struct work_struct *rebind_work = &worker->rebind_work;
3487-
3488-
/*
3489-
* Rebind_work may race with future cpu hotplug
3490-
* operations. Use a separate flag to mark that
3491-
* rebinding is scheduled.
3492-
*/
3493-
worker->flags |= WORKER_REBIND;
3494-
worker->flags &= ~WORKER_UNBOUND;
3495-
3496-
/* queue rebind_work, wq doesn't matter, use the default one */
3497-
if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
3498-
work_data_bits(rebind_work)))
3499-
continue;
3500-
3501-
debug_work_activate(rebind_work);
3502-
insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
3503-
worker->scheduled.next,
3504-
work_color_to_flags(WORK_NO_COLOR));
3505-
}
3506-
35073619
gcwq_release_management(gcwq);
35083620

35093621
/* notify completion */
@@ -3609,13 +3721,16 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
36093721
wait_trustee_state(gcwq, TRUSTEE_DONE);
36103722
}
36113723

3612-
/*
3613-
* Either DISASSOCIATED is already cleared or no worker is
3614-
* left on the gcwq. Safe to clear DISASSOCIATED without
3615-
* claiming managers.
3616-
*/
3724+
spin_unlock_irq(&gcwq->lock);
3725+
gcwq_claim_management(gcwq);
3726+
spin_lock_irq(&gcwq->lock);
3727+
36173728
gcwq->flags &= ~GCWQ_DISASSOCIATED;
36183729

3730+
rebind_workers(gcwq);
3731+
3732+
gcwq_release_management(gcwq);
3733+
36193734
/*
36203735
* Trustee is done and there might be no worker left.
36213736
* Put the first_idle in and request a real manager to
@@ -3910,6 +4025,8 @@ static int __init init_workqueues(void)
39104025
ida_init(&pool->worker_ida);
39114026
}
39124027

4028+
init_waitqueue_head(&gcwq->rebind_hold);
4029+
39134030
gcwq->trustee_state = TRUSTEE_DONE;
39144031
init_waitqueue_head(&gcwq->trustee_wait);
39154032
}

0 commit comments

Comments
 (0)