Skip to content

Commit b1737fe

Browse files
Hou TaoKernel Patches Daemon
Hou Tao
authored and
Kernel Patches Daemon
committed
bpf: Alloc bpf_async_cb by using bpf_global_ma under PREEMPT_RT
Under PREEMPT_RT, it is not safe to use GPF_ATOMIC kmalloc when preemption or irq is disabled. The following warning is reported when running test_progs under PREEMPT_RT: BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 675, name: test_progs preempt_count: 1, expected: 0 RCU nest depth: 0, expected: 0 2 locks held by test_progs/675: #0: ffffffff864b0240 (rcu_read_lock_trace){....}-{0:0}, at: bpf_prog_test_run_syscall+0x2c0/0x830 #1: ffff8881f4ec40c8 ((&c->lock)){....}-{2:2}, at: ___slab_alloc+0xbc/0x1280 Preemption disabled at: [<ffffffff8175ae2b>] __bpf_async_init+0xbb/0xb10 CPU: 1 UID: 0 PID: 675 Comm: test_progs Tainted: G O 6.12.0+ #11 Tainted: [O]=OOT_MODULE Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ... Call Trace: <TASK> dump_stack_lvl+0x57/0x70 dump_stack+0x10/0x20 __might_resched+0x337/0x4d0 rt_spin_lock+0xd4/0x230 ___slab_alloc+0xbc/0x1280 __slab_alloc.isra.0+0x5d/0xa0 __kmalloc_node_noprof+0xf7/0x4f0 bpf_map_kmalloc_node+0xf5/0x6b0 __bpf_async_init+0x20e/0xb10 bpf_timer_init+0x30/0x40 bpf_prog_c7e2dc9ff3d5ba62_start_cb+0x55/0x85 bpf_prog_4eb421be69ae82fa_start_timer+0x5d/0x7e bpf_prog_test_run_syscall+0x322/0x830 __sys_bpf+0x135d/0x3ca0 __x64_sys_bpf+0x75/0xb0 x64_sys_call+0x1b5/0xa10 do_syscall_64+0x3b/0xc0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fix the problem by using bpf_global_ma to allocate bpf_async_cb when PREEMPT_RT is enabled. The reason for still using kmalloc for no-PREEMPT_RT case is that bpf_global_ma doesn't support accouting the allocated memory to specific memcg. Also doing the memory allocation before invoking __bpf_spin_lock_irqsave() to reduce the possibility of -ENOMEM for bpf_global_ma. Signed-off-by: Hou Tao <[email protected]>
1 parent b095289 commit b1737fe

File tree

1 file changed

+38
-10
lines changed

1 file changed

+38
-10
lines changed

kernel/bpf/helpers.c

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,12 +1109,14 @@ struct bpf_async_cb {
11091109
* freeing the timers when inner map is replaced or deleted by user space.
11101110
*/
11111111
struct bpf_hrtimer {
1112+
/* cb must be the first member */
11121113
struct bpf_async_cb cb;
11131114
struct hrtimer timer;
11141115
atomic_t cancelling;
11151116
};
11161117

11171118
struct bpf_work {
1119+
/* cb must be the first member */
11181120
struct bpf_async_cb cb;
11191121
struct work_struct work;
11201122
struct work_struct delete_work;
@@ -1141,6 +1143,34 @@ enum bpf_async_type {
11411143

11421144
static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
11431145

1146+
static void bpf_async_free(struct bpf_async_cb *cb)
1147+
{
1148+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
1149+
bpf_mem_free(&bpf_global_ma, cb);
1150+
else
1151+
kfree(cb);
1152+
}
1153+
1154+
static void bpf_async_free_rcu(struct bpf_async_cb *cb)
1155+
{
1156+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
1157+
bpf_mem_free_rcu(&bpf_global_ma, cb);
1158+
else
1159+
kfree_rcu(cb, rcu);
1160+
}
1161+
1162+
static struct bpf_async_cb *bpf_async_alloc(struct bpf_map *map, size_t size)
1163+
{
1164+
struct bpf_async_cb *cb;
1165+
1166+
if (IS_ENABLED(CONFIG_PREEMPT_RT))
1167+
cb = bpf_mem_alloc(&bpf_global_ma, size);
1168+
else
1169+
/* allocate hrtimer via map_kmalloc to use memcg accounting */
1170+
cb = bpf_map_kmalloc_node(map, size, GFP_ATOMIC, map->numa_node);
1171+
return cb;
1172+
}
1173+
11441174
static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
11451175
{
11461176
struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
@@ -1221,7 +1251,7 @@ static void bpf_wq_delete_work(struct work_struct *work)
12211251

12221252
cancel_work_sync(&w->work);
12231253

1224-
kfree_rcu(w, cb.rcu);
1254+
bpf_async_free_rcu(&w->cb);
12251255
}
12261256

12271257
static void bpf_timer_delete_work(struct work_struct *work)
@@ -1236,7 +1266,7 @@ static void bpf_timer_delete_work(struct work_struct *work)
12361266
* bpf_timer_cancel_and_free will have been cancelled.
12371267
*/
12381268
hrtimer_cancel(&t->timer);
1239-
kfree_rcu(t, cb.rcu);
1269+
bpf_async_free_rcu(&t->cb);
12401270
}
12411271

12421272
static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags,
@@ -1263,20 +1293,18 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
12631293
return -EINVAL;
12641294
}
12651295

1296+
cb = bpf_async_alloc(map, size);
1297+
if (!cb)
1298+
return -ENOMEM;
1299+
12661300
__bpf_spin_lock_irqsave(&async->lock);
12671301
t = async->timer;
12681302
if (t) {
1303+
bpf_async_free(cb);
12691304
ret = -EBUSY;
12701305
goto out;
12711306
}
12721307

1273-
/* allocate hrtimer via map_kmalloc to use memcg accounting */
1274-
cb = bpf_map_kmalloc_node(map, size, GFP_ATOMIC, map->numa_node);
1275-
if (!cb) {
1276-
ret = -ENOMEM;
1277-
goto out;
1278-
}
1279-
12801308
switch (type) {
12811309
case BPF_ASYNC_TYPE_TIMER:
12821310
clockid = flags & (MAX_CLOCKS - 1);
@@ -1313,7 +1341,7 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
13131341
* or pinned in bpffs.
13141342
*/
13151343
WRITE_ONCE(async->cb, NULL);
1316-
kfree(cb);
1344+
bpf_async_free(cb);
13171345
ret = -EPERM;
13181346
}
13191347
out:

0 commit comments

Comments
 (0)