Skip to content

Commit 90d4675

Browse files
joannekoongNobody
authored and
Nobody
committed
bpf: Dynptr support for ring buffers
Currently, our only way of writing dynamically-sized data into a ring buffer is through bpf_ringbuf_output but this incurs an extra memcpy cost. bpf_ringbuf_reserve + bpf_ringbuf_commit avoids this extra memcpy, but it can only safely support reservation sizes that are statically known since the verifier cannot guarantee that the bpf program won’t access memory outside the reserved space. The bpf_dynptr abstraction allows for dynamically-sized ring buffer reservations without the extra memcpy. There are 3 new APIs: long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr); void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags); void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags); These closely follow the functionalities of the original ringbuf APIs. For example, all ringbuffer dynptrs that have been reserved must be either submitted or discarded before the program exits. Signed-off-by: Joanne Koong <[email protected]> Reported-by: kernel test robot <[email protected]>
1 parent 6c3319d commit 90d4675

File tree

6 files changed

+160
-4
lines changed

6 files changed

+160
-4
lines changed

include/linux/bpf.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,10 @@ enum bpf_type_flag {
352352
/* DYNPTR points to dynamically allocated memory. */
353353
DYNPTR_TYPE_MALLOC = BIT(8 + BPF_BASE_TYPE_BITS),
354354

355-
__BPF_TYPE_LAST_FLAG = DYNPTR_TYPE_MALLOC,
355+
/* DYNPTR points to a ringbuf record. */
356+
DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS),
357+
358+
__BPF_TYPE_LAST_FLAG = DYNPTR_TYPE_RINGBUF,
356359
};
357360

358361
/* Max number of base types. */
@@ -2255,6 +2258,9 @@ extern const struct bpf_func_proto bpf_ringbuf_reserve_proto;
22552258
extern const struct bpf_func_proto bpf_ringbuf_submit_proto;
22562259
extern const struct bpf_func_proto bpf_ringbuf_discard_proto;
22572260
extern const struct bpf_func_proto bpf_ringbuf_query_proto;
2261+
extern const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto;
2262+
extern const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto;
2263+
extern const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto;
22582264
extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto;
22592265
extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
22602266
extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
@@ -2418,6 +2424,8 @@ enum bpf_dynptr_type {
24182424
BPF_DYNPTR_TYPE_LOCAL,
24192425
/* Memory allocated dynamically by the kernel for the dynptr */
24202426
BPF_DYNPTR_TYPE_MALLOC,
2427+
/* Underlying data is a ringbuf record */
2428+
BPF_DYNPTR_TYPE_RINGBUF,
24212429
};
24222430

24232431
/* The upper 4 bits of dynptr->size are reserved. Consequently, the

include/uapi/linux/bpf.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5202,6 +5202,33 @@ union bpf_attr {
52025202
* Pointer to the underlying dynptr data, NULL if the ptr is
52035203
* read-only, if the dynptr is invalid, or if the offset and length
52045204
* is out of bounds.
5205+
*
5206+
* long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr)
5207+
* Description
5208+
* Reserve *size* bytes of payload in a ring buffer *ringbuf*
5209+
* through the dynptr interface. *flags* must be 0.
5210+
* Return
5211+
* 0 on success, or a negative error in case of failure.
5212+
*
5213+
* void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags)
5214+
* Description
5215+
* Submit reserved ring buffer sample, pointed to by *data*,
5216+
* through the dynptr interface.
5217+
*
5218+
* For more information on *flags*, please see
5219+
* 'bpf_ringbuf_submit'.
5220+
* Return
5221+
* Nothing. Always succeeds.
5222+
*
5223+
* void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags)
5224+
* Description
5225+
* Discard reserved ring buffer sample through the dynptr
5226+
* interface.
5227+
*
5228+
* For more information on *flags*, please see
5229+
* 'bpf_ringbuf_discard'.
5230+
* Return
5231+
* Nothing. Always succeeds.
52055232
*/
52065233
#define __BPF_FUNC_MAPPER(FN) \
52075234
FN(unspec), \
@@ -5404,6 +5431,9 @@ union bpf_attr {
54045431
FN(dynptr_read), \
54055432
FN(dynptr_write), \
54065433
FN(dynptr_data), \
5434+
FN(ringbuf_reserve_dynptr), \
5435+
FN(ringbuf_submit_dynptr), \
5436+
FN(ringbuf_discard_dynptr), \
54075437
/* */
54085438

54095439
/* integer value in 'imm' field of BPF_CALL instruction selects which helper

kernel/bpf/helpers.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1580,6 +1580,12 @@ bpf_base_func_proto(enum bpf_func_id func_id)
15801580
return &bpf_ringbuf_discard_proto;
15811581
case BPF_FUNC_ringbuf_query:
15821582
return &bpf_ringbuf_query_proto;
1583+
case BPF_FUNC_ringbuf_reserve_dynptr:
1584+
return &bpf_ringbuf_reserve_dynptr_proto;
1585+
case BPF_FUNC_ringbuf_submit_dynptr:
1586+
return &bpf_ringbuf_submit_dynptr_proto;
1587+
case BPF_FUNC_ringbuf_discard_dynptr:
1588+
return &bpf_ringbuf_discard_dynptr_proto;
15831589
case BPF_FUNC_for_each_map_elem:
15841590
return &bpf_for_each_map_elem_proto;
15851591
case BPF_FUNC_loop:

kernel/bpf/ringbuf.c

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,3 +475,74 @@ const struct bpf_func_proto bpf_ringbuf_query_proto = {
475475
.arg1_type = ARG_CONST_MAP_PTR,
476476
.arg2_type = ARG_ANYTHING,
477477
};
478+
479+
BPF_CALL_4(bpf_ringbuf_reserve_dynptr, struct bpf_map *, map, u32, size, u64, flags,
480+
struct bpf_dynptr_kern *, ptr)
481+
{
482+
void *sample;
483+
int err;
484+
485+
err = bpf_dynptr_check_size(size);
486+
if (err) {
487+
bpf_dynptr_set_null(ptr);
488+
return err;
489+
}
490+
491+
sample = (void *)____bpf_ringbuf_reserve(map, size, flags);
492+
493+
if (!sample) {
494+
bpf_dynptr_set_null(ptr);
495+
return -EINVAL;
496+
}
497+
498+
bpf_dynptr_init(ptr, sample, BPF_DYNPTR_TYPE_RINGBUF, 0, size);
499+
500+
return 0;
501+
}
502+
503+
const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto = {
504+
.func = bpf_ringbuf_reserve_dynptr,
505+
.ret_type = RET_INTEGER,
506+
.arg1_type = ARG_CONST_MAP_PTR,
507+
.arg2_type = ARG_ANYTHING,
508+
.arg3_type = ARG_ANYTHING,
509+
.arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_UNINIT,
510+
};
511+
512+
BPF_CALL_2(bpf_ringbuf_submit_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags)
513+
{
514+
if (!ptr->data)
515+
return 0;
516+
517+
____bpf_ringbuf_submit(ptr->data, flags);
518+
519+
bpf_dynptr_set_null(ptr);
520+
521+
return 0;
522+
}
523+
524+
const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto = {
525+
.func = bpf_ringbuf_submit_dynptr,
526+
.ret_type = RET_VOID,
527+
.arg1_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_RELEASE,
528+
.arg2_type = ARG_ANYTHING,
529+
};
530+
531+
BPF_CALL_2(bpf_ringbuf_discard_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags)
532+
{
533+
if (!ptr->data)
534+
return 0;
535+
536+
____bpf_ringbuf_discard(ptr->data, flags);
537+
538+
bpf_dynptr_set_null(ptr);
539+
540+
return 0;
541+
}
542+
543+
const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto = {
544+
.func = bpf_ringbuf_discard_dynptr,
545+
.ret_type = RET_VOID,
546+
.arg1_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_RELEASE,
547+
.arg2_type = ARG_ANYTHING,
548+
};

kernel/bpf/verifier.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,7 @@ static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
679679

680680
static int arg_to_dynptr_type(enum bpf_arg_type arg_type, enum bpf_dynptr_type *dynptr_type)
681681
{
682-
int type = arg_type & (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_MALLOC);
682+
int type = arg_type & (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_MALLOC | DYNPTR_TYPE_RINGBUF);
683683

684684
switch (type) {
685685
case DYNPTR_TYPE_LOCAL:
@@ -688,6 +688,9 @@ static int arg_to_dynptr_type(enum bpf_arg_type arg_type, enum bpf_dynptr_type *
688688
case DYNPTR_TYPE_MALLOC:
689689
*dynptr_type = BPF_DYNPTR_TYPE_MALLOC;
690690
break;
691+
case DYNPTR_TYPE_RINGBUF:
692+
*dynptr_type = BPF_DYNPTR_TYPE_RINGBUF;
693+
break;
691694
default:
692695
/* Can't have more than one type set and can't have no
693696
* type set
@@ -702,7 +705,7 @@ static bool dynptr_type_refcounted(struct bpf_func_state *state, int spi)
702705
{
703706
enum bpf_dynptr_type type = state->stack[spi].spilled_ptr.dynptr_type;
704707

705-
return type == BPF_DYNPTR_TYPE_MALLOC;
708+
return type == BPF_DYNPTR_TYPE_MALLOC || type == BPF_DYNPTR_TYPE_RINGBUF;
706709
}
707710

708711
static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
@@ -5842,6 +5845,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
58425845
err_extra = "local ";
58435846
else if (arg_type & DYNPTR_TYPE_MALLOC)
58445847
err_extra = "malloc ";
5848+
else if (arg_type & DYNPTR_TYPE_RINGBUF)
5849+
err_extra = "ringbuf ";
58455850
verbose(env, "Expected an initialized %sdynptr as arg #%d\n",
58465851
err_extra, arg + 1);
58475852
return -EINVAL;
@@ -5966,7 +5971,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
59665971
case BPF_MAP_TYPE_RINGBUF:
59675972
if (func_id != BPF_FUNC_ringbuf_output &&
59685973
func_id != BPF_FUNC_ringbuf_reserve &&
5969-
func_id != BPF_FUNC_ringbuf_query)
5974+
func_id != BPF_FUNC_ringbuf_query &&
5975+
func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
5976+
func_id != BPF_FUNC_ringbuf_submit_dynptr &&
5977+
func_id != BPF_FUNC_ringbuf_discard_dynptr)
59705978
goto error;
59715979
break;
59725980
case BPF_MAP_TYPE_STACK_TRACE:
@@ -6082,6 +6090,9 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
60826090
case BPF_FUNC_ringbuf_output:
60836091
case BPF_FUNC_ringbuf_reserve:
60846092
case BPF_FUNC_ringbuf_query:
6093+
case BPF_FUNC_ringbuf_reserve_dynptr:
6094+
case BPF_FUNC_ringbuf_submit_dynptr:
6095+
case BPF_FUNC_ringbuf_discard_dynptr:
60856096
if (map->map_type != BPF_MAP_TYPE_RINGBUF)
60866097
goto error;
60876098
break;

tools/include/uapi/linux/bpf.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5202,6 +5202,33 @@ union bpf_attr {
52025202
* Pointer to the underlying dynptr data, NULL if the ptr is
52035203
* read-only, if the dynptr is invalid, or if the offset and length
52045204
* is out of bounds.
5205+
*
5206+
* long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr)
5207+
* Description
5208+
* Reserve *size* bytes of payload in a ring buffer *ringbuf*
5209+
* through the dynptr interface. *flags* must be 0.
5210+
* Return
5211+
* 0 on success, or a negative error in case of failure.
5212+
*
5213+
* void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags)
5214+
* Description
5215+
* Submit reserved ring buffer sample, pointed to by *data*,
5216+
* through the dynptr interface.
5217+
*
5218+
* For more information on *flags*, please see
5219+
* 'bpf_ringbuf_submit'.
5220+
* Return
5221+
* Nothing. Always succeeds.
5222+
*
5223+
* void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags)
5224+
* Description
5225+
* Discard reserved ring buffer sample through the dynptr
5226+
* interface.
5227+
*
5228+
* For more information on *flags*, please see
5229+
* 'bpf_ringbuf_discard'.
5230+
* Return
5231+
* Nothing. Always succeeds.
52055232
*/
52065233
#define __BPF_FUNC_MAPPER(FN) \
52075234
FN(unspec), \
@@ -5404,6 +5431,9 @@ union bpf_attr {
54045431
FN(dynptr_read), \
54055432
FN(dynptr_write), \
54065433
FN(dynptr_data), \
5434+
FN(ringbuf_reserve_dynptr), \
5435+
FN(ringbuf_submit_dynptr), \
5436+
FN(ringbuf_discard_dynptr), \
54075437
/* */
54085438

54095439
/* integer value in 'imm' field of BPF_CALL instruction selects which helper

0 commit comments

Comments
 (0)