Skip to content

Commit 6d25be5

Browse files
KAGA-KOKOIngo Molnar
authored and
Ingo Molnar
committed
sched/core, workqueues: Distangle worker accounting from rq lock
The worker accounting for CPU bound workers is plugged into the core scheduler code and the wakeup code. This is not a hard requirement and can be avoided by keeping track of the state in the workqueue code itself. Keep track of the sleeping state in the worker itself and call the notifier before entering the core scheduler. There might be false positives when the task is woken between that call and actually scheduling, but that's not really different from scheduling and being woken immediately after switching away. When nr_running is updated when the task is retunrning from schedule() then it is later compared when it is done from ttwu(). [ bigeasy: preempt_disable() around wq_worker_sleeping() by Daniel Bristot de Oliveira ] Signed-off-by: Thomas Gleixner <[email protected]> Signed-off-by: Sebastian Andrzej Siewior <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Acked-by: Tejun Heo <[email protected]> Cc: Daniel Bristot de Oliveira <[email protected]> Cc: Lai Jiangshan <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Peter Zijlstra <[email protected]> Link: http://lkml.kernel.org/r/ad2b29b5715f970bffc1a7026cabd6ff0b24076a.1532952814.git.bristot@redhat.com Signed-off-by: Ingo Molnar <[email protected]>
1 parent e2abb39 commit 6d25be5

File tree

3 files changed

+48
-99
lines changed

3 files changed

+48
-99
lines changed

kernel/sched/core.c

Lines changed: 21 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1685,10 +1685,6 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl
16851685
{
16861686
activate_task(rq, p, en_flags);
16871687
p->on_rq = TASK_ON_RQ_QUEUED;
1688-
1689-
/* If a worker is waking up, notify the workqueue: */
1690-
if (p->flags & PF_WQ_WORKER)
1691-
wq_worker_waking_up(p, cpu_of(rq));
16921688
}
16931689

16941690
/*
@@ -2106,56 +2102,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
21062102
return success;
21072103
}
21082104

2109-
/**
2110-
* try_to_wake_up_local - try to wake up a local task with rq lock held
2111-
* @p: the thread to be awakened
2112-
* @rf: request-queue flags for pinning
2113-
*
2114-
* Put @p on the run-queue if it's not already there. The caller must
2115-
* ensure that this_rq() is locked, @p is bound to this_rq() and not
2116-
* the current task.
2117-
*/
2118-
static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf)
2119-
{
2120-
struct rq *rq = task_rq(p);
2121-
2122-
if (WARN_ON_ONCE(rq != this_rq()) ||
2123-
WARN_ON_ONCE(p == current))
2124-
return;
2125-
2126-
lockdep_assert_held(&rq->lock);
2127-
2128-
if (!raw_spin_trylock(&p->pi_lock)) {
2129-
/*
2130-
* This is OK, because current is on_cpu, which avoids it being
2131-
* picked for load-balance and preemption/IRQs are still
2132-
* disabled avoiding further scheduler activity on it and we've
2133-
* not yet picked a replacement task.
2134-
*/
2135-
rq_unlock(rq, rf);
2136-
raw_spin_lock(&p->pi_lock);
2137-
rq_relock(rq, rf);
2138-
}
2139-
2140-
if (!(p->state & TASK_NORMAL))
2141-
goto out;
2142-
2143-
trace_sched_waking(p);
2144-
2145-
if (!task_on_rq_queued(p)) {
2146-
if (p->in_iowait) {
2147-
delayacct_blkio_end(p);
2148-
atomic_dec(&rq->nr_iowait);
2149-
}
2150-
ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK);
2151-
}
2152-
2153-
ttwu_do_wakeup(rq, p, 0, rf);
2154-
ttwu_stat(p, smp_processor_id(), 0);
2155-
out:
2156-
raw_spin_unlock(&p->pi_lock);
2157-
}
2158-
21592105
/**
21602106
* wake_up_process - Wake up a specific process
21612107
* @p: The process to be woken up.
@@ -3472,19 +3418,6 @@ static void __sched notrace __schedule(bool preempt)
34723418
atomic_inc(&rq->nr_iowait);
34733419
delayacct_blkio_start();
34743420
}
3475-
3476-
/*
3477-
* If a worker went to sleep, notify and ask workqueue
3478-
* whether it wants to wake up a task to maintain
3479-
* concurrency.
3480-
*/
3481-
if (prev->flags & PF_WQ_WORKER) {
3482-
struct task_struct *to_wakeup;
3483-
3484-
to_wakeup = wq_worker_sleeping(prev);
3485-
if (to_wakeup)
3486-
try_to_wake_up_local(to_wakeup, &rf);
3487-
}
34883421
}
34893422
switch_count = &prev->nvcsw;
34903423
}
@@ -3544,6 +3477,20 @@ static inline void sched_submit_work(struct task_struct *tsk)
35443477
{
35453478
if (!tsk->state || tsk_is_pi_blocked(tsk))
35463479
return;
3480+
3481+
/*
3482+
* If a worker went to sleep, notify and ask workqueue whether
3483+
* it wants to wake up a task to maintain concurrency.
3484+
* As this function is called inside the schedule() context,
3485+
* we disable preemption to avoid it calling schedule() again
3486+
* in the possible wakeup of a kworker.
3487+
*/
3488+
if (tsk->flags & PF_WQ_WORKER) {
3489+
preempt_disable();
3490+
wq_worker_sleeping(tsk);
3491+
preempt_enable_no_resched();
3492+
}
3493+
35473494
/*
35483495
* If we are going to sleep and we have plugged IO queued,
35493496
* make sure to submit it to avoid deadlocks.
@@ -3552,6 +3499,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
35523499
blk_schedule_flush_plug(tsk);
35533500
}
35543501

3502+
static void sched_update_worker(struct task_struct *tsk)
3503+
{
3504+
if (tsk->flags & PF_WQ_WORKER)
3505+
wq_worker_running(tsk);
3506+
}
3507+
35553508
asmlinkage __visible void __sched schedule(void)
35563509
{
35573510
struct task_struct *tsk = current;
@@ -3562,6 +3515,7 @@ asmlinkage __visible void __sched schedule(void)
35623515
__schedule(false);
35633516
sched_preempt_enable_no_resched();
35643517
} while (need_resched());
3518+
sched_update_worker(tsk);
35653519
}
35663520
EXPORT_SYMBOL(schedule);
35673521

kernel/workqueue.c

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -841,43 +841,32 @@ static void wake_up_worker(struct worker_pool *pool)
841841
}
842842

843843
/**
844-
* wq_worker_waking_up - a worker is waking up
844+
* wq_worker_running - a worker is running again
845845
* @task: task waking up
846-
* @cpu: CPU @task is waking up to
847846
*
848-
* This function is called during try_to_wake_up() when a worker is
849-
* being awoken.
850-
*
851-
* CONTEXT:
852-
* spin_lock_irq(rq->lock)
847+
* This function is called when a worker returns from schedule()
853848
*/
854-
void wq_worker_waking_up(struct task_struct *task, int cpu)
849+
void wq_worker_running(struct task_struct *task)
855850
{
856851
struct worker *worker = kthread_data(task);
857852

858-
if (!(worker->flags & WORKER_NOT_RUNNING)) {
859-
WARN_ON_ONCE(worker->pool->cpu != cpu);
853+
if (!worker->sleeping)
854+
return;
855+
if (!(worker->flags & WORKER_NOT_RUNNING))
860856
atomic_inc(&worker->pool->nr_running);
861-
}
857+
worker->sleeping = 0;
862858
}
863859

864860
/**
865861
* wq_worker_sleeping - a worker is going to sleep
866862
* @task: task going to sleep
867863
*
868-
* This function is called during schedule() when a busy worker is
869-
* going to sleep. Worker on the same cpu can be woken up by
870-
* returning pointer to its task.
871-
*
872-
* CONTEXT:
873-
* spin_lock_irq(rq->lock)
874-
*
875-
* Return:
876-
* Worker task on @cpu to wake up, %NULL if none.
864+
* This function is called from schedule() when a busy worker is
865+
* going to sleep.
877866
*/
878-
struct task_struct *wq_worker_sleeping(struct task_struct *task)
867+
void wq_worker_sleeping(struct task_struct *task)
879868
{
880-
struct worker *worker = kthread_data(task), *to_wakeup = NULL;
869+
struct worker *next, *worker = kthread_data(task);
881870
struct worker_pool *pool;
882871

883872
/*
@@ -886,13 +875,15 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
886875
* checking NOT_RUNNING.
887876
*/
888877
if (worker->flags & WORKER_NOT_RUNNING)
889-
return NULL;
878+
return;
890879

891880
pool = worker->pool;
892881

893-
/* this can only happen on the local cpu */
894-
if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
895-
return NULL;
882+
if (WARN_ON_ONCE(worker->sleeping))
883+
return;
884+
885+
worker->sleeping = 1;
886+
spin_lock_irq(&pool->lock);
896887

897888
/*
898889
* The counterpart of the following dec_and_test, implied mb,
@@ -906,9 +897,12 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
906897
* lock is safe.
907898
*/
908899
if (atomic_dec_and_test(&pool->nr_running) &&
909-
!list_empty(&pool->worklist))
910-
to_wakeup = first_idle_worker(pool);
911-
return to_wakeup ? to_wakeup->task : NULL;
900+
!list_empty(&pool->worklist)) {
901+
next = first_idle_worker(pool);
902+
if (next)
903+
wake_up_process(next->task);
904+
}
905+
spin_unlock_irq(&pool->lock);
912906
}
913907

914908
/**
@@ -4929,7 +4923,7 @@ static void rebind_workers(struct worker_pool *pool)
49294923
*
49304924
* WRITE_ONCE() is necessary because @worker->flags may be
49314925
* tested without holding any lock in
4932-
* wq_worker_waking_up(). Without it, NOT_RUNNING test may
4926+
* wq_worker_running(). Without it, NOT_RUNNING test may
49334927
* fail incorrectly leading to premature concurrency
49344928
* management operations.
49354929
*/

kernel/workqueue_internal.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ struct worker {
4444
unsigned long last_active; /* L: last active timestamp */
4545
unsigned int flags; /* X: flags */
4646
int id; /* I: worker id */
47+
int sleeping; /* None */
4748

4849
/*
4950
* Opaque string set with work_set_desc(). Printed out with task
@@ -72,8 +73,8 @@ static inline struct worker *current_wq_worker(void)
7273
* Scheduler hooks for concurrency managed workqueue. Only to be used from
7374
* sched/ and workqueue.c.
7475
*/
75-
void wq_worker_waking_up(struct task_struct *task, int cpu);
76-
struct task_struct *wq_worker_sleeping(struct task_struct *task);
76+
void wq_worker_running(struct task_struct *task);
77+
void wq_worker_sleeping(struct task_struct *task);
7778
work_func_t wq_worker_last_func(struct task_struct *task);
7879

7980
#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */

0 commit comments

Comments
 (0)