Skip to content

Commit b4acc47

Browse files
Hou TaoNobody
Hou Tao
authored and
Nobody
committed
bpf, arm64: support more atomic operations
Atomics for eBPF patch series adds support for atomic[64]_fetch_add, atomic[64]_[fetch_]{and,or,xor} and atomic[64]_{xchg|cmpxchg}, but it only adds support for x86-64, so support these atomic operations for arm64 as well. Basically the implementation procedure is almost mechanical translation of code snippets in atomic_ll_sc.h & atomic_lse.h & cmpxchg.h located under arch/arm64/include/asm. When LSE atomic is unavailable, an extra temporary register is needed for (BPF_ADD | BPF_FETCH) to save the value of src register, instead of adding TMP_REG_4 just use BPF_REG_AX instead. Also make emit_lse_atomic() as an empty inline function when CONFIG_ARM64_LSE_ATOMICS is disabled. For cpus_have_cap(ARM64_HAS_LSE_ATOMICS) case and no-LSE-ATOMICS case, the following three tests: "./test_verifier", "./test_progs -t atomic" and "insmod ./test_bpf.ko" are exercised and passed. Signed-off-by: Hou Tao <[email protected]>
1 parent 15d268a commit b4acc47

File tree

2 files changed

+217
-45
lines changed

2 files changed

+217
-45
lines changed

arch/arm64/net/bpf_jit.h

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,17 +88,42 @@
8888
/* [Rn] = Rt; (atomic) Rs = [state] */
8989
#define A64_STXR(sf, Rt, Rn, Rs) \
9090
A64_LSX(sf, Rt, Rn, Rs, STORE_EX)
91+
/* [Rn] = Rt (store release); (atomic) Rs = [state] */
92+
#define A64_STLXR(sf, Rt, Rn, Rs) \
93+
aarch64_insn_gen_load_store_ex(Rt, Rn, Rs, A64_SIZE(sf), \
94+
AARCH64_INSN_LDST_STORE_REL_EX)
9195

9296
/*
9397
* LSE atomics
9498
*
95-
* STADD is simply encoded as an alias for LDADD with XZR as
96-
* the destination register.
99+
* ST{ADD,CLR,SET,EOR} is simply encoded as an alias for
100+
* LDD{ADD,CLR,SET,EOR} with XZR as the destination register.
97101
*/
98-
#define A64_STADD(sf, Rn, Rs) \
102+
#define A64_ST_OP(sf, Rn, Rs, op) \
99103
aarch64_insn_gen_atomic_ld_op(A64_ZR, Rn, Rs, \
100-
A64_SIZE(sf), AARCH64_INSN_MEM_ATOMIC_ADD, \
104+
A64_SIZE(sf), AARCH64_INSN_MEM_ATOMIC_##op, \
101105
AARCH64_INSN_MEM_ORDER_NONE)
106+
/* [Rn] <op>= Rs */
107+
#define A64_STADD(sf, Rn, Rs) A64_ST_OP(sf, Rn, Rs, ADD)
108+
#define A64_STCLR(sf, Rn, Rs) A64_ST_OP(sf, Rn, Rs, CLR)
109+
#define A64_STEOR(sf, Rn, Rs) A64_ST_OP(sf, Rn, Rs, EOR)
110+
#define A64_STSET(sf, Rn, Rs) A64_ST_OP(sf, Rn, Rs, SET)
111+
112+
#define A64_LD_OP_AL(sf, Rt, Rn, Rs, op) \
113+
aarch64_insn_gen_atomic_ld_op(Rt, Rn, Rs, \
114+
A64_SIZE(sf), AARCH64_INSN_MEM_ATOMIC_##op, \
115+
AARCH64_INSN_MEM_ORDER_ACQREL)
116+
/* Rt = [Rn] (load acquire); [Rn] <op>= Rs (store release) */
117+
#define A64_LDADDAL(sf, Rt, Rn, Rs) A64_LD_OP_AL(sf, Rt, Rn, Rs, ADD)
118+
#define A64_LDCLRAL(sf, Rt, Rn, Rs) A64_LD_OP_AL(sf, Rt, Rn, Rs, CLR)
119+
#define A64_LDEORAL(sf, Rt, Rn, Rs) A64_LD_OP_AL(sf, Rt, Rn, Rs, EOR)
120+
#define A64_LDSETAL(sf, Rt, Rn, Rs) A64_LD_OP_AL(sf, Rt, Rn, Rs, SET)
121+
/* Rt = [Rn] (load acquire); [Rn] = Rs (store release) */
122+
#define A64_SWPAL(sf, Rt, Rn, Rs) A64_LD_OP_AL(sf, Rt, Rn, Rs, SWP)
123+
/* Rs = CAS(Rn, Rs, Rt) (load acquire & store release) */
124+
#define A64_CASAL(sf, Rt, Rn, Rs) \
125+
aarch64_insn_gen_cas(Rt, Rn, Rs, A64_SIZE(sf), \
126+
AARCH64_INSN_MEM_ORDER_ACQREL)
102127

103128
/* Add/subtract (immediate) */
104129
#define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \
@@ -203,6 +228,9 @@
203228
#define A64_ANDS(sf, Rd, Rn, Rm) A64_LOGIC_SREG(sf, Rd, Rn, Rm, AND_SETFLAGS)
204229
/* Rn & Rm; set condition flags */
205230
#define A64_TST(sf, Rn, Rm) A64_ANDS(sf, A64_ZR, Rn, Rm)
231+
/* Rd = ~Rm (alias of ORN with A64_ZR as Rn) */
232+
#define A64_MVN(sf, Rd, Rm) \
233+
A64_LOGIC_SREG(sf, Rd, A64_ZR, Rm, ORN)
206234

207235
/* Logical (immediate) */
208236
#define A64_LOGIC_IMM(sf, Rd, Rn, imm, type) ({ \
@@ -226,4 +254,7 @@
226254
#define A64_BTI_J A64_HINT(AARCH64_INSN_HINT_BTIJ)
227255
#define A64_BTI_JC A64_HINT(AARCH64_INSN_HINT_BTIJC)
228256

257+
/* DMB */
258+
#define A64_DMB_ISH aarch64_insn_gen_dmb(AARCH64_INSN_MB_ISH)
259+
229260
#endif /* _BPF_JIT_H */

arch/arm64/net/bpf_jit_comp.c

Lines changed: 182 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,17 @@
2727
#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
2828
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
2929

30+
#define check_imm(bits, imm) do { \
31+
if ((((imm) > 0) && ((imm) >> (bits))) || \
32+
(((imm) < 0) && (~(imm) >> (bits)))) { \
33+
pr_info("[%2d] imm=%d(0x%x) out of range\n", \
34+
i, imm, imm); \
35+
return -EINVAL; \
36+
} \
37+
} while (0)
38+
#define check_imm19(imm) check_imm(19, imm)
39+
#define check_imm26(imm) check_imm(26, imm)
40+
3041
/* Map BPF registers to A64 registers */
3142
static const int bpf2a64[] = {
3243
/* return value from in-kernel function, and exit value from eBPF */
@@ -329,6 +340,170 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
329340
#undef jmp_offset
330341
}
331342

343+
#ifdef CONFIG_ARM64_LSE_ATOMICS
344+
static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
345+
{
346+
const u8 code = insn->code;
347+
const u8 dst = bpf2a64[insn->dst_reg];
348+
const u8 src = bpf2a64[insn->src_reg];
349+
const u8 tmp = bpf2a64[TMP_REG_1];
350+
const u8 tmp2 = bpf2a64[TMP_REG_2];
351+
const bool isdw = BPF_SIZE(code) == BPF_DW;
352+
const s16 off = insn->off;
353+
u8 reg;
354+
355+
if (!off) {
356+
reg = dst;
357+
} else {
358+
emit_a64_mov_i(1, tmp, off, ctx);
359+
emit(A64_ADD(1, tmp, tmp, dst), ctx);
360+
reg = tmp;
361+
}
362+
363+
switch (insn->imm) {
364+
/* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
365+
case BPF_ADD:
366+
emit(A64_STADD(isdw, reg, src), ctx);
367+
break;
368+
case BPF_AND:
369+
emit(A64_MVN(isdw, tmp2, src), ctx);
370+
emit(A64_STCLR(isdw, reg, tmp2), ctx);
371+
break;
372+
case BPF_OR:
373+
emit(A64_STSET(isdw, reg, src), ctx);
374+
break;
375+
case BPF_XOR:
376+
emit(A64_STEOR(isdw, reg, src), ctx);
377+
break;
378+
/* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
379+
case BPF_ADD | BPF_FETCH:
380+
emit(A64_LDADDAL(isdw, src, reg, src), ctx);
381+
break;
382+
case BPF_AND | BPF_FETCH:
383+
emit(A64_MVN(isdw, tmp2, src), ctx);
384+
emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx);
385+
break;
386+
case BPF_OR | BPF_FETCH:
387+
emit(A64_LDSETAL(isdw, src, reg, src), ctx);
388+
break;
389+
case BPF_XOR | BPF_FETCH:
390+
emit(A64_LDEORAL(isdw, src, reg, src), ctx);
391+
break;
392+
/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
393+
case BPF_XCHG:
394+
emit(A64_SWPAL(isdw, src, reg, src), ctx);
395+
break;
396+
/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
397+
case BPF_CMPXCHG:
398+
emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx);
399+
break;
400+
default:
401+
pr_err_once("unknown atomic op code %02x\n", insn->imm);
402+
return -EINVAL;
403+
}
404+
405+
return 0;
406+
}
407+
#else
408+
static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
409+
{
410+
return -EINVAL;
411+
}
412+
#endif
413+
414+
static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
415+
{
416+
const u8 code = insn->code;
417+
const u8 dst = bpf2a64[insn->dst_reg];
418+
const u8 src = bpf2a64[insn->src_reg];
419+
const u8 tmp = bpf2a64[TMP_REG_1];
420+
const u8 tmp2 = bpf2a64[TMP_REG_2];
421+
const u8 tmp3 = bpf2a64[TMP_REG_3];
422+
const int i = insn - ctx->prog->insnsi;
423+
const s32 imm = insn->imm;
424+
const s16 off = insn->off;
425+
const bool isdw = BPF_SIZE(code) == BPF_DW;
426+
u8 reg;
427+
s32 jmp_offset;
428+
429+
if (!off) {
430+
reg = dst;
431+
} else {
432+
emit_a64_mov_i(1, tmp, off, ctx);
433+
emit(A64_ADD(1, tmp, tmp, dst), ctx);
434+
reg = tmp;
435+
}
436+
437+
if (imm == BPF_ADD || imm == BPF_AND ||
438+
imm == BPF_OR || imm == BPF_XOR) {
439+
/* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
440+
emit(A64_LDXR(isdw, tmp2, reg), ctx);
441+
if (imm == BPF_ADD)
442+
emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
443+
else if (imm == BPF_AND)
444+
emit(A64_AND(isdw, tmp2, tmp2, src), ctx);
445+
else if (imm == BPF_OR)
446+
emit(A64_ORR(isdw, tmp2, tmp2, src), ctx);
447+
else
448+
emit(A64_EOR(isdw, tmp2, tmp2, src), ctx);
449+
emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
450+
jmp_offset = -3;
451+
check_imm19(jmp_offset);
452+
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
453+
} else if (imm == (BPF_ADD | BPF_FETCH) ||
454+
imm == (BPF_AND | BPF_FETCH) ||
455+
imm == (BPF_OR | BPF_FETCH) ||
456+
imm == (BPF_XOR | BPF_FETCH)) {
457+
/* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
458+
const u8 ax = bpf2a64[BPF_REG_AX];
459+
460+
emit(A64_MOV(isdw, ax, src), ctx);
461+
emit(A64_LDXR(isdw, src, reg), ctx);
462+
if (imm == (BPF_ADD | BPF_FETCH))
463+
emit(A64_ADD(isdw, tmp2, src, ax), ctx);
464+
else if (imm == (BPF_AND | BPF_FETCH))
465+
emit(A64_AND(isdw, tmp2, src, ax), ctx);
466+
else if (imm == (BPF_OR | BPF_FETCH))
467+
emit(A64_ORR(isdw, tmp2, src, ax), ctx);
468+
else
469+
emit(A64_EOR(isdw, tmp2, src, ax), ctx);
470+
emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
471+
jmp_offset = -3;
472+
check_imm19(jmp_offset);
473+
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
474+
emit(A64_DMB_ISH, ctx);
475+
} else if (imm == BPF_XCHG) {
476+
/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
477+
emit(A64_MOV(isdw, tmp2, src), ctx);
478+
emit(A64_LDXR(isdw, src, reg), ctx);
479+
emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
480+
jmp_offset = -2;
481+
check_imm19(jmp_offset);
482+
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
483+
emit(A64_DMB_ISH, ctx);
484+
} else if (imm == BPF_CMPXCHG) {
485+
/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
486+
const u8 r0 = bpf2a64[BPF_REG_0];
487+
488+
emit(A64_MOV(isdw, tmp2, r0), ctx);
489+
emit(A64_LDXR(isdw, r0, reg), ctx);
490+
emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx);
491+
jmp_offset = 4;
492+
check_imm19(jmp_offset);
493+
emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx);
494+
emit(A64_STLXR(isdw, src, reg, tmp3), ctx);
495+
jmp_offset = -4;
496+
check_imm19(jmp_offset);
497+
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
498+
emit(A64_DMB_ISH, ctx);
499+
} else {
500+
pr_err_once("unknown atomic op code %02x\n", imm);
501+
return -EINVAL;
502+
}
503+
504+
return 0;
505+
}
506+
332507
static void build_epilogue(struct jit_ctx *ctx)
333508
{
334509
const u8 r0 = bpf2a64[BPF_REG_0];
@@ -434,29 +609,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
434609
const u8 src = bpf2a64[insn->src_reg];
435610
const u8 tmp = bpf2a64[TMP_REG_1];
436611
const u8 tmp2 = bpf2a64[TMP_REG_2];
437-
const u8 tmp3 = bpf2a64[TMP_REG_3];
438612
const s16 off = insn->off;
439613
const s32 imm = insn->imm;
440614
const int i = insn - ctx->prog->insnsi;
441615
const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
442616
BPF_CLASS(code) == BPF_JMP;
443-
const bool isdw = BPF_SIZE(code) == BPF_DW;
444-
u8 jmp_cond, reg;
617+
u8 jmp_cond;
445618
s32 jmp_offset;
446619
u32 a64_insn;
447620
int ret;
448621

449-
#define check_imm(bits, imm) do { \
450-
if ((((imm) > 0) && ((imm) >> (bits))) || \
451-
(((imm) < 0) && (~(imm) >> (bits)))) { \
452-
pr_info("[%2d] imm=%d(0x%x) out of range\n", \
453-
i, imm, imm); \
454-
return -EINVAL; \
455-
} \
456-
} while (0)
457-
#define check_imm19(imm) check_imm(19, imm)
458-
#define check_imm26(imm) check_imm(26, imm)
459-
460622
switch (code) {
461623
/* dst = src */
462624
case BPF_ALU | BPF_MOV | BPF_X:
@@ -891,33 +1053,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
8911053

8921054
case BPF_STX | BPF_ATOMIC | BPF_W:
8931055
case BPF_STX | BPF_ATOMIC | BPF_DW:
894-
if (insn->imm != BPF_ADD) {
895-
pr_err_once("unknown atomic op code %02x\n", insn->imm);
896-
return -EINVAL;
897-
}
898-
899-
/* STX XADD: lock *(u32 *)(dst + off) += src
900-
* and
901-
* STX XADD: lock *(u64 *)(dst + off) += src
902-
*/
903-
904-
if (!off) {
905-
reg = dst;
906-
} else {
907-
emit_a64_mov_i(1, tmp, off, ctx);
908-
emit(A64_ADD(1, tmp, tmp, dst), ctx);
909-
reg = tmp;
910-
}
911-
if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) {
912-
emit(A64_STADD(isdw, reg, src), ctx);
913-
} else {
914-
emit(A64_LDXR(isdw, tmp2, reg), ctx);
915-
emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
916-
emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
917-
jmp_offset = -3;
918-
check_imm19(jmp_offset);
919-
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
920-
}
1056+
if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
1057+
ret = emit_lse_atomic(insn, ctx);
1058+
else
1059+
ret = emit_ll_sc_atomic(insn, ctx);
1060+
if (ret)
1061+
return ret;
9211062
break;
9221063

9231064
default:

0 commit comments

Comments
 (0)