Skip to content

Commit e5ced8e

Browse files
committed
cgroup_freezer: replace freezer->lock with freezer_mutex
After 96d365e ("cgroup: make css_set_lock a rwsem and rename it to css_set_rwsem"), css task iterators requires sleepable context as it may block on css_set_rwsem. I missed that cgroup_freezer was iterating tasks under IRQ-safe spinlock freezer->lock. This leads to errors like the following on freezer state reads and transitions. BUG: sleeping function called from invalid context at /work /os/work/kernel/locking/rwsem.c:20 in_atomic(): 0, irqs_disabled(): 0, pid: 462, name: bash 5 locks held by bash/462: #0: (sb_writers#7){.+.+.+}, at: [<ffffffff811f0843>] vfs_write+0x1a3/0x1c0 #1: (&of->mutex){+.+.+.}, at: [<ffffffff8126d78b>] kernfs_fop_write+0xbb/0x170 #2: (s_active#70){.+.+.+}, at: [<ffffffff8126d793>] kernfs_fop_write+0xc3/0x170 #3: (freezer_mutex){+.+...}, at: [<ffffffff81135981>] freezer_write+0x61/0x1e0 #4: (rcu_read_lock){......}, at: [<ffffffff81135973>] freezer_write+0x53/0x1e0 Preemption disabled at:[<ffffffff81104404>] console_unlock+0x1e4/0x460 CPU: 3 PID: 462 Comm: bash Not tainted 3.15.0-rc1-work+ #10 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 ffff88000916a6d0 ffff88000e0a3da0 ffffffff81cf8c96 0000000000000000 ffff88000e0a3dc8 ffffffff810cf4f2 ffffffff82388040 ffff880013aaf740 0000000000000002 ffff88000e0a3de8 ffffffff81d05974 0000000000000246 Call Trace: [<ffffffff81cf8c96>] dump_stack+0x4e/0x7a [<ffffffff810cf4f2>] __might_sleep+0x162/0x260 [<ffffffff81d05974>] down_read+0x24/0x60 [<ffffffff81133e87>] css_task_iter_start+0x27/0x70 [<ffffffff8113584d>] freezer_apply_state+0x5d/0x130 [<ffffffff81135a16>] freezer_write+0xf6/0x1e0 [<ffffffff8112eb88>] cgroup_file_write+0xd8/0x230 [<ffffffff8126d7b7>] kernfs_fop_write+0xe7/0x170 [<ffffffff811f0756>] vfs_write+0xb6/0x1c0 [<ffffffff811f121d>] SyS_write+0x4d/0xc0 [<ffffffff81d08292>] system_call_fastpath+0x16/0x1b freezer->lock used to be used in hot paths but that time is long gone and there's no reason for the lock to be IRQ-safe spinlock or even per-cgroup. In fact, given the fact that a cgroup may contain large number of tasks, it's not a good idea to iterate over them while holding IRQ-safe spinlock. Let's simplify locking by replacing per-cgroup freezer->lock with global freezer_mutex. This also makes the comments explaining the intricacies of policy inheritance and the locking around it as the states are protected by a common mutex. The conversion is mostly straight-forward. The followings are worth mentioning. * freezer_css_online() no longer needs double locking. * freezer_attach() now performs propagation simply while holding freezer_mutex. update_if_frozen() race no longer exists and the comment is removed. * freezer_fork() now tests whether the task is in root cgroup using the new task_css_is_root() without doing rcu_read_lock/unlock(). If not, it grabs freezer_mutex and performs the operation. * freezer_read() and freezer_change_state() grab freezer_mutex across the whole operation and pin the css while iterating so that each descendant processing happens in sleepable context. Fixes: 96d365e ("cgroup: make css_set_lock a rwsem and rename it to css_set_rwsem") Signed-off-by: Tejun Heo <[email protected]> Acked-by: Li Zefan <[email protected]>
1 parent 5024ae2 commit e5ced8e

File tree

1 file changed

+46
-66
lines changed

1 file changed

+46
-66
lines changed

kernel/cgroup_freezer.c

+46-66
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <linux/uaccess.h>
2222
#include <linux/freezer.h>
2323
#include <linux/seq_file.h>
24+
#include <linux/mutex.h>
2425

2526
/*
2627
* A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
@@ -42,9 +43,10 @@ enum freezer_state_flags {
4243
struct freezer {
4344
struct cgroup_subsys_state css;
4445
unsigned int state;
45-
spinlock_t lock;
4646
};
4747

48+
static DEFINE_MUTEX(freezer_mutex);
49+
4850
static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
4951
{
5052
return css ? container_of(css, struct freezer, css) : NULL;
@@ -93,7 +95,6 @@ freezer_css_alloc(struct cgroup_subsys_state *parent_css)
9395
if (!freezer)
9496
return ERR_PTR(-ENOMEM);
9597

96-
spin_lock_init(&freezer->lock);
9798
return &freezer->css;
9899
}
99100

@@ -110,14 +111,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css)
110111
struct freezer *freezer = css_freezer(css);
111112
struct freezer *parent = parent_freezer(freezer);
112113

113-
/*
114-
* The following double locking and freezing state inheritance
115-
* guarantee that @cgroup can never escape ancestors' freezing
116-
* states. See css_for_each_descendant_pre() for details.
117-
*/
118-
if (parent)
119-
spin_lock_irq(&parent->lock);
120-
spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING);
114+
mutex_lock(&freezer_mutex);
121115

122116
freezer->state |= CGROUP_FREEZER_ONLINE;
123117

@@ -126,10 +120,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css)
126120
atomic_inc(&system_freezing_cnt);
127121
}
128122

129-
spin_unlock(&freezer->lock);
130-
if (parent)
131-
spin_unlock_irq(&parent->lock);
132-
123+
mutex_unlock(&freezer_mutex);
133124
return 0;
134125
}
135126

@@ -144,14 +135,14 @@ static void freezer_css_offline(struct cgroup_subsys_state *css)
144135
{
145136
struct freezer *freezer = css_freezer(css);
146137

147-
spin_lock_irq(&freezer->lock);
138+
mutex_lock(&freezer_mutex);
148139

149140
if (freezer->state & CGROUP_FREEZING)
150141
atomic_dec(&system_freezing_cnt);
151142

152143
freezer->state = 0;
153144

154-
spin_unlock_irq(&freezer->lock);
145+
mutex_unlock(&freezer_mutex);
155146
}
156147

157148
static void freezer_css_free(struct cgroup_subsys_state *css)
@@ -175,7 +166,7 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
175166
struct task_struct *task;
176167
bool clear_frozen = false;
177168

178-
spin_lock_irq(&freezer->lock);
169+
mutex_lock(&freezer_mutex);
179170

180171
/*
181172
* Make the new tasks conform to the current state of @new_css.
@@ -197,21 +188,13 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
197188
}
198189
}
199190

200-
spin_unlock_irq(&freezer->lock);
201-
202-
/*
203-
* Propagate FROZEN clearing upwards. We may race with
204-
* update_if_frozen(), but as long as both work bottom-up, either
205-
* update_if_frozen() sees child's FROZEN cleared or we clear the
206-
* parent's FROZEN later. No parent w/ !FROZEN children can be
207-
* left FROZEN.
208-
*/
191+
/* propagate FROZEN clearing upwards */
209192
while (clear_frozen && (freezer = parent_freezer(freezer))) {
210-
spin_lock_irq(&freezer->lock);
211193
freezer->state &= ~CGROUP_FROZEN;
212194
clear_frozen = freezer->state & CGROUP_FREEZING;
213-
spin_unlock_irq(&freezer->lock);
214195
}
196+
197+
mutex_unlock(&freezer_mutex);
215198
}
216199

217200
/**
@@ -228,34 +211,25 @@ static void freezer_fork(struct task_struct *task)
228211
{
229212
struct freezer *freezer;
230213

231-
rcu_read_lock();
232-
freezer = task_freezer(task);
233-
234214
/*
235215
* The root cgroup is non-freezable, so we can skip locking the
236216
* freezer. This is safe regardless of race with task migration.
237217
* If we didn't race or won, skipping is obviously the right thing
238218
* to do. If we lost and root is the new cgroup, noop is still the
239219
* right thing to do.
240220
*/
241-
if (!parent_freezer(freezer))
242-
goto out;
221+
if (task_css_is_root(task, freezer_cgrp_id))
222+
return;
243223

244-
/*
245-
* Grab @freezer->lock and freeze @task after verifying @task still
246-
* belongs to @freezer and it's freezing. The former is for the
247-
* case where we have raced against task migration and lost and
248-
* @task is already in a different cgroup which may not be frozen.
249-
* This isn't strictly necessary as freeze_task() is allowed to be
250-
* called spuriously but let's do it anyway for, if nothing else,
251-
* documentation.
252-
*/
253-
spin_lock_irq(&freezer->lock);
254-
if (freezer == task_freezer(task) && (freezer->state & CGROUP_FREEZING))
224+
mutex_lock(&freezer_mutex);
225+
rcu_read_lock();
226+
227+
freezer = task_freezer(task);
228+
if (freezer->state & CGROUP_FREEZING)
255229
freeze_task(task);
256-
spin_unlock_irq(&freezer->lock);
257-
out:
230+
258231
rcu_read_unlock();
232+
mutex_unlock(&freezer_mutex);
259233
}
260234

261235
/**
@@ -281,22 +255,22 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
281255
struct css_task_iter it;
282256
struct task_struct *task;
283257

284-
WARN_ON_ONCE(!rcu_read_lock_held());
285-
286-
spin_lock_irq(&freezer->lock);
258+
lockdep_assert_held(&freezer_mutex);
287259

288260
if (!(freezer->state & CGROUP_FREEZING) ||
289261
(freezer->state & CGROUP_FROZEN))
290-
goto out_unlock;
262+
return;
291263

292264
/* are all (live) children frozen? */
265+
rcu_read_lock();
293266
css_for_each_child(pos, css) {
294267
struct freezer *child = css_freezer(pos);
295268

296269
if ((child->state & CGROUP_FREEZER_ONLINE) &&
297270
!(child->state & CGROUP_FROZEN))
298-
goto out_unlock;
271+
return;
299272
}
273+
rcu_read_unlock();
300274

301275
/* are all tasks frozen? */
302276
css_task_iter_start(css, &it);
@@ -317,21 +291,29 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
317291
freezer->state |= CGROUP_FROZEN;
318292
out_iter_end:
319293
css_task_iter_end(&it);
320-
out_unlock:
321-
spin_unlock_irq(&freezer->lock);
322294
}
323295

324296
static int freezer_read(struct seq_file *m, void *v)
325297
{
326298
struct cgroup_subsys_state *css = seq_css(m), *pos;
327299

300+
mutex_lock(&freezer_mutex);
328301
rcu_read_lock();
329302

330303
/* update states bottom-up */
331-
css_for_each_descendant_post(pos, css)
304+
css_for_each_descendant_post(pos, css) {
305+
if (!css_tryget(pos))
306+
continue;
307+
rcu_read_unlock();
308+
332309
update_if_frozen(pos);
333310

311+
rcu_read_lock();
312+
css_put(pos);
313+
}
314+
334315
rcu_read_unlock();
316+
mutex_unlock(&freezer_mutex);
335317

336318
seq_puts(m, freezer_state_strs(css_freezer(css)->state));
337319
seq_putc(m, '\n');
@@ -373,7 +355,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
373355
unsigned int state)
374356
{
375357
/* also synchronizes against task migration, see freezer_attach() */
376-
lockdep_assert_held(&freezer->lock);
358+
lockdep_assert_held(&freezer_mutex);
377359

378360
if (!(freezer->state & CGROUP_FREEZER_ONLINE))
379361
return;
@@ -414,31 +396,29 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
414396
* descendant will try to inherit its parent's FREEZING state as
415397
* CGROUP_FREEZING_PARENT.
416398
*/
399+
mutex_lock(&freezer_mutex);
417400
rcu_read_lock();
418401
css_for_each_descendant_pre(pos, &freezer->css) {
419402
struct freezer *pos_f = css_freezer(pos);
420403
struct freezer *parent = parent_freezer(pos_f);
421404

422-
spin_lock_irq(&pos_f->lock);
405+
if (!css_tryget(pos))
406+
continue;
407+
rcu_read_unlock();
423408

424-
if (pos_f == freezer) {
409+
if (pos_f == freezer)
425410
freezer_apply_state(pos_f, freeze,
426411
CGROUP_FREEZING_SELF);
427-
} else {
428-
/*
429-
* Our update to @parent->state is already visible
430-
* which is all we need. No need to lock @parent.
431-
* For more info on synchronization, see
432-
* freezer_post_create().
433-
*/
412+
else
434413
freezer_apply_state(pos_f,
435414
parent->state & CGROUP_FREEZING,
436415
CGROUP_FREEZING_PARENT);
437-
}
438416

439-
spin_unlock_irq(&pos_f->lock);
417+
rcu_read_lock();
418+
css_put(pos);
440419
}
441420
rcu_read_unlock();
421+
mutex_unlock(&freezer_mutex);
442422
}
443423

444424
static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft,

0 commit comments

Comments
 (0)