Skip to content

Commit 1705c62

Browse files
author
Alexei Starovoitov
committed
Merge branch 'Sleepable local storage'
KP Singh says: ==================== Local storage is currently unusable in sleepable helpers. One of the important use cases of local_storage is to attach security (or performance) contextual information to kernel objects in LSM / tracing programs to be used later in the life-cyle of the object. Sometimes this context can only be gathered from sleepable programs (because it needs accesing __user pointers or helpers like bpf_ima_inode_hash). Allowing local storage to be used from sleepable programs allows such context to be managed with the benefits of local_storage. # v2 -> v3 * Fixed some RCU issues pointed by Martin * Added Martin's ack # v1 -> v2 * Generalize RCU checks (will send a separate patch for updating non local storage code where this can be used). * Add missing RCU lock checks from v1 ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 3ccdcee + 0ae6eff commit 1705c62

File tree

8 files changed

+73
-49
lines changed

8 files changed

+73
-49
lines changed

include/linux/bpf_local_storage.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717

1818
#define BPF_LOCAL_STORAGE_CACHE_SIZE 16
1919

20+
#define bpf_rcu_lock_held() \
21+
(rcu_read_lock_held() || rcu_read_lock_trace_held() || \
22+
rcu_read_lock_bh_held())
2023
struct bpf_local_storage_map_bucket {
2124
struct hlist_head list;
2225
raw_spinlock_t lock;
@@ -162,4 +165,6 @@ struct bpf_local_storage_data *
162165
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
163166
void *value, u64 map_flags);
164167

168+
void bpf_local_storage_free_rcu(struct rcu_head *rcu);
169+
165170
#endif /* _BPF_LOCAL_STORAGE_H */

kernel/bpf/bpf_inode_storage.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <linux/bpf_lsm.h>
1818
#include <linux/btf_ids.h>
1919
#include <linux/fdtable.h>
20+
#include <linux/rcupdate_trace.h>
2021

2122
DEFINE_BPF_STORAGE_CACHE(inode_cache);
2223

@@ -44,7 +45,8 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode,
4445
if (!bsb)
4546
return NULL;
4647

47-
inode_storage = rcu_dereference(bsb->storage);
48+
inode_storage =
49+
rcu_dereference_check(bsb->storage, bpf_rcu_lock_held());
4850
if (!inode_storage)
4951
return NULL;
5052

@@ -172,6 +174,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
172174
{
173175
struct bpf_local_storage_data *sdata;
174176

177+
WARN_ON_ONCE(!bpf_rcu_lock_held());
175178
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
176179
return (unsigned long)NULL;
177180

@@ -204,6 +207,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
204207
BPF_CALL_2(bpf_inode_storage_delete,
205208
struct bpf_map *, map, struct inode *, inode)
206209
{
210+
WARN_ON_ONCE(!bpf_rcu_lock_held());
207211
if (!inode)
208212
return -EINVAL;
209213

kernel/bpf/bpf_local_storage.c

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
#include <net/sock.h>
1212
#include <uapi/linux/sock_diag.h>
1313
#include <uapi/linux/btf.h>
14+
#include <linux/rcupdate.h>
15+
#include <linux/rcupdate_trace.h>
16+
#include <linux/rcupdate_wait.h>
1417

1518
#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)
1619

@@ -81,6 +84,22 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
8184
return NULL;
8285
}
8386

87+
void bpf_local_storage_free_rcu(struct rcu_head *rcu)
88+
{
89+
struct bpf_local_storage *local_storage;
90+
91+
local_storage = container_of(rcu, struct bpf_local_storage, rcu);
92+
kfree_rcu(local_storage, rcu);
93+
}
94+
95+
static void bpf_selem_free_rcu(struct rcu_head *rcu)
96+
{
97+
struct bpf_local_storage_elem *selem;
98+
99+
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
100+
kfree_rcu(selem, rcu);
101+
}
102+
84103
/* local_storage->lock must be held and selem->local_storage == local_storage.
85104
* The caller must ensure selem->smap is still valid to be
86105
* dereferenced for its smap->elem_size and smap->cache_idx.
@@ -93,7 +112,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
93112
bool free_local_storage;
94113
void *owner;
95114

96-
smap = rcu_dereference(SDATA(selem)->smap);
115+
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
97116
owner = local_storage->owner;
98117

99118
/* All uncharging on the owner must be done first.
@@ -118,21 +137,20 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
118137
*
119138
* Although the unlock will be done under
120139
* rcu_read_lock(), it is more intutivie to
121-
* read if kfree_rcu(local_storage, rcu) is done
140+
* read if the freeing of the storage is done
122141
* after the raw_spin_unlock_bh(&local_storage->lock).
123142
*
124143
* Hence, a "bool free_local_storage" is returned
125-
* to the caller which then calls the kfree_rcu()
126-
* after unlock.
144+
* to the caller which then calls then frees the storage after
145+
* all the RCU grace periods have expired.
127146
*/
128147
}
129148
hlist_del_init_rcu(&selem->snode);
130149
if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
131150
SDATA(selem))
132151
RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
133152

134-
kfree_rcu(selem, rcu);
135-
153+
call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
136154
return free_local_storage;
137155
}
138156

@@ -146,15 +164,17 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
146164
/* selem has already been unlinked from sk */
147165
return;
148166

149-
local_storage = rcu_dereference(selem->local_storage);
167+
local_storage = rcu_dereference_check(selem->local_storage,
168+
bpf_rcu_lock_held());
150169
raw_spin_lock_irqsave(&local_storage->lock, flags);
151170
if (likely(selem_linked_to_storage(selem)))
152171
free_local_storage = bpf_selem_unlink_storage_nolock(
153172
local_storage, selem, true);
154173
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
155174

156175
if (free_local_storage)
157-
kfree_rcu(local_storage, rcu);
176+
call_rcu_tasks_trace(&local_storage->rcu,
177+
bpf_local_storage_free_rcu);
158178
}
159179

160180
void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
@@ -174,7 +194,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
174194
/* selem has already be unlinked from smap */
175195
return;
176196

177-
smap = rcu_dereference(SDATA(selem)->smap);
197+
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
178198
b = select_bucket(smap, selem);
179199
raw_spin_lock_irqsave(&b->lock, flags);
180200
if (likely(selem_linked_to_map(selem)))
@@ -213,12 +233,14 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
213233
struct bpf_local_storage_elem *selem;
214234

215235
/* Fast path (cache hit) */
216-
sdata = rcu_dereference(local_storage->cache[smap->cache_idx]);
236+
sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx],
237+
bpf_rcu_lock_held());
217238
if (sdata && rcu_access_pointer(sdata->smap) == smap)
218239
return sdata;
219240

220241
/* Slow path (cache miss) */
221-
hlist_for_each_entry_rcu(selem, &local_storage->list, snode)
242+
hlist_for_each_entry_rcu(selem, &local_storage->list, snode,
243+
rcu_read_lock_trace_held())
222244
if (rcu_access_pointer(SDATA(selem)->smap) == smap)
223245
break;
224246

@@ -306,7 +328,8 @@ int bpf_local_storage_alloc(void *owner,
306328
* bucket->list, first_selem can be freed immediately
307329
* (instead of kfree_rcu) because
308330
* bpf_local_storage_map_free() does a
309-
* synchronize_rcu() before walking the bucket->list.
331+
* synchronize_rcu_mult (waiting for both sleepable and
332+
* normal programs) before walking the bucket->list.
310333
* Hence, no one is accessing selem from the
311334
* bucket->list under rcu_read_lock().
312335
*/
@@ -342,7 +365,8 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
342365
!map_value_has_spin_lock(&smap->map)))
343366
return ERR_PTR(-EINVAL);
344367

345-
local_storage = rcu_dereference(*owner_storage(smap, owner));
368+
local_storage = rcu_dereference_check(*owner_storage(smap, owner),
369+
bpf_rcu_lock_held());
346370
if (!local_storage || hlist_empty(&local_storage->list)) {
347371
/* Very first elem for the owner */
348372
err = check_flags(NULL, map_flags);

kernel/bpf/bpf_task_storage.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <uapi/linux/btf.h>
1818
#include <linux/btf_ids.h>
1919
#include <linux/fdtable.h>
20+
#include <linux/rcupdate_trace.h>
2021

2122
DEFINE_BPF_STORAGE_CACHE(task_cache);
2223

@@ -59,7 +60,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
5960
struct bpf_local_storage *task_storage;
6061
struct bpf_local_storage_map *smap;
6162

62-
task_storage = rcu_dereference(task->bpf_storage);
63+
task_storage =
64+
rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held());
6365
if (!task_storage)
6466
return NULL;
6567

@@ -229,6 +231,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
229231
{
230232
struct bpf_local_storage_data *sdata;
231233

234+
WARN_ON_ONCE(!bpf_rcu_lock_held());
232235
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
233236
return (unsigned long)NULL;
234237

@@ -260,6 +263,7 @@ BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
260263
{
261264
int ret;
262265

266+
WARN_ON_ONCE(!bpf_rcu_lock_held());
263267
if (!task)
264268
return -EINVAL;
265269

kernel/bpf/verifier.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11874,6 +11874,9 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
1187411874
}
1187511875
break;
1187611876
case BPF_MAP_TYPE_RINGBUF:
11877+
case BPF_MAP_TYPE_INODE_STORAGE:
11878+
case BPF_MAP_TYPE_SK_STORAGE:
11879+
case BPF_MAP_TYPE_TASK_STORAGE:
1187711880
break;
1187811881
default:
1187911882
verbose(env,

net/core/bpf_sk_storage.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <net/sock.h>
1414
#include <uapi/linux/sock_diag.h>
1515
#include <uapi/linux/btf.h>
16+
#include <linux/rcupdate_trace.h>
1617

1718
DEFINE_BPF_STORAGE_CACHE(sk_cache);
1819

@@ -22,7 +23,8 @@ bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
2223
struct bpf_local_storage *sk_storage;
2324
struct bpf_local_storage_map *smap;
2425

25-
sk_storage = rcu_dereference(sk->sk_bpf_storage);
26+
sk_storage =
27+
rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held());
2628
if (!sk_storage)
2729
return NULL;
2830

@@ -258,6 +260,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
258260
{
259261
struct bpf_local_storage_data *sdata;
260262

263+
WARN_ON_ONCE(!bpf_rcu_lock_held());
261264
if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE)
262265
return (unsigned long)NULL;
263266

@@ -288,6 +291,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
288291

289292
BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
290293
{
294+
WARN_ON_ONCE(!bpf_rcu_lock_held());
291295
if (!sk || !sk_fullsock(sk))
292296
return -EINVAL;
293297

@@ -416,6 +420,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
416420
BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
417421
void *, value, u64, flags)
418422
{
423+
WARN_ON_ONCE(!bpf_rcu_lock_held());
419424
if (in_hardirq() || in_nmi())
420425
return (unsigned long)NULL;
421426

@@ -425,6 +430,7 @@ BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
425430
BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
426431
struct sock *, sk)
427432
{
433+
WARN_ON_ONCE(!bpf_rcu_lock_held());
428434
if (in_hardirq() || in_nmi())
429435
return -EPERM;
430436

tools/testing/selftests/bpf/prog_tests/test_local_storage.c

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,6 @@ static unsigned int duration;
2828
struct storage {
2929
void *inode;
3030
unsigned int value;
31-
/* Lock ensures that spin locked versions of local stoage operations
32-
* also work, most operations in this tests are still single threaded
33-
*/
34-
struct bpf_spin_lock lock;
3531
};
3632

3733
/* Fork and exec the provided rm binary and return the exit code of the
@@ -66,27 +62,24 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path)
6662

6763
static bool check_syscall_operations(int map_fd, int obj_fd)
6864
{
69-
struct storage val = { .value = TEST_STORAGE_VALUE, .lock = { 0 } },
70-
lookup_val = { .value = 0, .lock = { 0 } };
65+
struct storage val = { .value = TEST_STORAGE_VALUE },
66+
lookup_val = { .value = 0 };
7167
int err;
7268

7369
/* Looking up an existing element should fail initially */
74-
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
75-
BPF_F_LOCK);
70+
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
7671
if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
7772
"err:%d errno:%d\n", err, errno))
7873
return false;
7974

8075
/* Create a new element */
81-
err = bpf_map_update_elem(map_fd, &obj_fd, &val,
82-
BPF_NOEXIST | BPF_F_LOCK);
76+
err = bpf_map_update_elem(map_fd, &obj_fd, &val, BPF_NOEXIST);
8377
if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err,
8478
errno))
8579
return false;
8680

8781
/* Lookup the newly created element */
88-
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
89-
BPF_F_LOCK);
82+
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
9083
if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err,
9184
errno))
9285
return false;
@@ -102,8 +95,7 @@ static bool check_syscall_operations(int map_fd, int obj_fd)
10295
return false;
10396

10497
/* The lookup should fail, now that the element has been deleted */
105-
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
106-
BPF_F_LOCK);
98+
err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
10799
if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
108100
"err:%d errno:%d\n", err, errno))
109101
return false;

0 commit comments

Comments
 (0)