Skip to content

Bpf arm64 percpu #6929

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions arch/arm64/include/asm/insn.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,12 @@ enum aarch64_insn_special_register {
AARCH64_INSN_SPCLREG_SP_EL2 = 0xF210
};

enum aarch64_insn_system_register {
AARCH64_INSN_SYSREG_TPIDR_EL1 = 0x4684,
AARCH64_INSN_SYSREG_TPIDR_EL2 = 0x6682,
AARCH64_INSN_SYSREG_SP_EL0 = 0x4208,
};

enum aarch64_insn_variant {
AARCH64_INSN_VARIANT_32BIT,
AARCH64_INSN_VARIANT_64BIT
Expand Down Expand Up @@ -686,6 +692,8 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
}
#endif
u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type);
u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result,
enum aarch64_insn_system_register sysreg);

s32 aarch64_get_branch_offset(u32 insn);
u32 aarch64_set_branch_offset(u32 insn, s32 offset);
Expand Down
11 changes: 11 additions & 0 deletions arch/arm64/lib/insn.c
Original file line number Diff line number Diff line change
Expand Up @@ -1515,3 +1515,14 @@ u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)

return insn;
}

u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result,
enum aarch64_insn_system_register sysreg)
{
u32 insn = aarch64_insn_get_mrs_value();

insn &= ~GENMASK(19, 0);
insn |= sysreg << 5;
return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT,
insn, result);
}
8 changes: 8 additions & 0 deletions arch/arm64/net/bpf_jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,4 +297,12 @@
#define A64_ADR(Rd, offset) \
aarch64_insn_gen_adr(0, offset, Rd, AARCH64_INSN_ADR_TYPE_ADR)

/* MRS */
#define A64_MRS_TPIDR_EL1(Rt) \
aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_TPIDR_EL1)
#define A64_MRS_TPIDR_EL2(Rt) \
aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_TPIDR_EL2)
#define A64_MRS_SP_EL0(Rt) \
aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_SP_EL0)

#endif /* _BPF_JIT_H */
37 changes: 37 additions & 0 deletions arch/arm64/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
emit(A64_ORR(1, tmp, dst, tmp), ctx);
emit(A64_MOV(1, dst, tmp), ctx);
break;
} else if (insn_is_mov_percpu_addr(insn)) {
if (dst != src)
emit(A64_MOV(1, dst, src), ctx);
if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
emit(A64_MRS_TPIDR_EL2(tmp), ctx);
else
emit(A64_MRS_TPIDR_EL1(tmp), ctx);
emit(A64_ADD(1, dst, dst, tmp), ctx);
break;
}
switch (insn->off) {
case 0:
Expand Down Expand Up @@ -1206,6 +1215,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
const u8 r0 = bpf2a64[BPF_REG_0];
bool func_addr_fixed;
u64 func_addr;
u32 cpu_offset = offsetof(struct thread_info, cpu);

/* Implement helper call to bpf_get_smp_processor_id() inline */
if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) {
emit(A64_MRS_SP_EL0(tmp), ctx);
if (is_lsi_offset(cpu_offset, 2)) {
emit(A64_LDR32I(r0, tmp, cpu_offset), ctx);
} else {
emit_a64_mov_i(1, tmp2, cpu_offset, ctx);
emit(A64_LDR32(r0, tmp, tmp2), ctx);
}
break;
}

ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
&func_addr, &func_addr_fixed);
Expand Down Expand Up @@ -2527,6 +2549,21 @@ bool bpf_jit_supports_arena(void)
return true;
}

bool bpf_jit_supports_percpu_insn(void)
{
return true;
}

bool bpf_jit_inlines_helper_call(s32 imm)
{
switch (imm) {
case BPF_FUNC_get_smp_processor_id:
return true;
}

return false;
}

void bpf_jit_free(struct bpf_prog *prog)
{
if (prog->jited) {
Expand Down
50 changes: 50 additions & 0 deletions arch/riscv/net/bpf_jit_comp64.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <linux/stop_machine.h>
#include <asm/patch.h>
#include <asm/cfi.h>
#include <asm/percpu.h>
#include "bpf_jit.h"

#define RV_FENTRY_NINSNS 2
Expand Down Expand Up @@ -1089,6 +1090,24 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
emit_or(RV_REG_T1, rd, RV_REG_T1, ctx);
emit_mv(rd, RV_REG_T1, ctx);
break;
} else if (insn_is_mov_percpu_addr(insn)) {
if (rd != rs)
emit_mv(rd, rs, ctx);
#ifdef CONFIG_SMP
/* Load current CPU number in T1 */
emit_ld(RV_REG_T1, offsetof(struct thread_info, cpu),
RV_REG_TP, ctx);
/* << 3 because offsets are 8 bytes */
emit_slli(RV_REG_T1, RV_REG_T1, 3, ctx);
/* Load address of __per_cpu_offset array in T2 */
emit_addr(RV_REG_T2, (u64)&__per_cpu_offset, extra_pass, ctx);
/* Add offset of current CPU to __per_cpu_offset */
emit_add(RV_REG_T1, RV_REG_T2, RV_REG_T1, ctx);
/* Load __per_cpu_offset[cpu] in T1 */
emit_ld(RV_REG_T1, 0, RV_REG_T1, ctx);
/* Add the offset to Rd */
emit_add(rd, rd, RV_REG_T1, ctx);
#endif
}
if (imm == 1) {
/* Special mov32 for zext */
Expand Down Expand Up @@ -1474,6 +1493,22 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
bool fixed_addr;
u64 addr;

/* Inline calls to bpf_get_smp_processor_id()
*
* RV_REG_TP holds the address of the current CPU's task_struct and thread_info is
* at offset 0 in task_struct.
* Load cpu from thread_info:
* Set R0 to ((struct thread_info *)(RV_REG_TP))->cpu
*
* This replicates the implementation of raw_smp_processor_id() on RISCV
*/
if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) {
/* Load current CPU number in R0 */
emit_ld(bpf_to_rv_reg(BPF_REG_0, ctx), offsetof(struct thread_info, cpu),
RV_REG_TP, ctx);
break;
}

mark_call(ctx);
ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
&addr, &fixed_addr);
Expand Down Expand Up @@ -2038,3 +2073,18 @@ bool bpf_jit_supports_arena(void)
{
return true;
}

bool bpf_jit_supports_percpu_insn(void)
{
return true;
}

bool bpf_jit_inlines_helper_call(s32 imm)
{
switch (imm) {
case BPF_FUNC_get_smp_processor_id:
return true;
}

return false;
}
1 change: 1 addition & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -993,6 +993,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
void bpf_jit_compile(struct bpf_prog *prog);
bool bpf_jit_needs_zext(void);
bool bpf_jit_inlines_helper_call(s32 imm);
bool bpf_jit_supports_subprog_tailcalls(void);
bool bpf_jit_supports_percpu_insn(void);
bool bpf_jit_supports_kfunc_call(void);
Expand Down
11 changes: 11 additions & 0 deletions kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2941,6 +2941,17 @@ bool __weak bpf_jit_needs_zext(void)
return false;
}

/* Return true if the JIT inlines the call to the helper corresponding to
* the imm.
*
* The verifier will not patch the insn->imm for the call to the helper if
* this returns true.
*/
bool __weak bpf_jit_inlines_helper_call(s32 imm)
{
return false;
}

/* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */
bool __weak bpf_jit_supports_subprog_tailcalls(void)
{
Expand Down
2 changes: 2 additions & 0 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -20020,6 +20020,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto next_insn;
}

if (bpf_jit_inlines_helper_call(insn->imm))
goto next_insn;
if (insn->imm == BPF_FUNC_get_route_realm)
prog->dst_needed = 1;
if (insn->imm == BPF_FUNC_get_prandom_u32)
Expand Down
9 changes: 9 additions & 0 deletions tools/testing/selftests/bpf/bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,10 @@ extern const struct bench bench_trig_fmodret;
extern const struct bench bench_trig_tp;
extern const struct bench bench_trig_rawtp;

extern const struct bench bench_trig_arr_inc;
extern const struct bench bench_trig_hash_inc;
extern const struct bench bench_trig_glob_arr_inc;

/* uprobe/uretprobe benchmarks */
extern const struct bench bench_trig_uprobe_nop;
extern const struct bench bench_trig_uretprobe_nop;
Expand Down Expand Up @@ -566,6 +570,11 @@ static const struct bench *benchs[] = {
&bench_trig_fmodret,
&bench_trig_tp,
&bench_trig_rawtp,

&bench_trig_arr_inc,
&bench_trig_hash_inc,
&bench_trig_glob_arr_inc,

/* uprobes */
&bench_trig_uprobe_nop,
&bench_trig_uretprobe_nop,
Expand Down
31 changes: 31 additions & 0 deletions tools/testing/selftests/bpf/benchs/bench_trigger.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,33 @@ static void trigger_fentry_setup(void)
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}

static void trigger_arr_inc_setup(void)
{
setup_ctx();
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
bpf_program__set_autoload(ctx.skel->progs.trigger_arr_inc, true);
load_ctx();
ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_arr_inc);
}

static void trigger_hash_inc_setup(void)
{
setup_ctx();
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
bpf_program__set_autoload(ctx.skel->progs.trigger_hash_inc, true);
load_ctx();
ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_hash_inc);
}

static void trigger_glob_arr_inc_setup(void)
{
setup_ctx();
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
bpf_program__set_autoload(ctx.skel->progs.trigger_glob_arr_inc, true);
load_ctx();
ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_glob_arr_inc);
}

static void trigger_fexit_setup(void)
{
setup_ctx();
Expand Down Expand Up @@ -435,6 +462,10 @@ BENCH_TRIG_KERNEL(fmodret, "fmodret");
BENCH_TRIG_KERNEL(tp, "tp");
BENCH_TRIG_KERNEL(rawtp, "rawtp");

BENCH_TRIG_KERNEL(arr_inc, "arr-inc");
BENCH_TRIG_KERNEL(hash_inc, "hash-inc");
BENCH_TRIG_KERNEL(glob_arr_inc, "glob-arr-inc");

/* uprobe benchmarks */
#define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \
const struct bench bench_trig_##KIND = { \
Expand Down
83 changes: 83 additions & 0 deletions tools/testing/selftests/bpf/progs/trigger_bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,50 @@ static __always_inline void inc_counter(void)
__sync_add_and_fetch(&hits[cpu & CPU_MASK].value, 1);
}

static __always_inline void inc_counter2(int amount)
{
int cpu = bpf_get_smp_processor_id();

__sync_add_and_fetch(&hits[cpu & CPU_MASK].value, amount);
}

struct {
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
__type(key, int);
__type(value, int);
__uint(max_entries, 1);
} hash_map SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, int);
__type(value, int);
__uint(max_entries, 1);
} array_map SEC(".maps");

static int zero = 0;

static void __always_inline hash_inc(void *map) {
int *p;

p = bpf_map_lookup_elem(map, &zero);
if (!p) {
bpf_map_update_elem(map, &zero, &zero, BPF_ANY);
p = bpf_map_lookup_elem(map, &zero);
if (!p)
return;
}
*p += 1;
}

struct counter arr[256];

static void __always_inline glob_arr_inc(void) {
int cpu = bpf_get_smp_processor_id();

arr[cpu].value += 1;
}

SEC("?uprobe")
int bench_trigger_uprobe(void *ctx)
{
Expand All @@ -34,6 +78,45 @@ int bench_trigger_uprobe(void *ctx)

const volatile int batch_iters = 0;

SEC("?raw_tp")
int trigger_arr_inc(void *ctx)
{
int i;

for (i = 0; i < batch_iters; i++)
hash_inc(&array_map);

inc_counter2(batch_iters);

return 0;
}

SEC("?raw_tp")
int trigger_hash_inc(void *ctx)
{
int i;

for (i = 0; i < batch_iters; i++)
hash_inc(&hash_map);

inc_counter2(batch_iters);

return 0;
}

SEC("?raw_tp")
int trigger_glob_arr_inc(void *ctx)
{
int i;

for (i = 0; i < batch_iters; i++)
glob_arr_inc();

inc_counter2(batch_iters);

return 0;
}

SEC("?raw_tp")
int trigger_count(void *ctx)
{
Expand Down
Loading