Skip to content

Commit db67827

Browse files
puranjaymohanKernel Patches Daemon
authored and
Kernel Patches Daemon
committed
arm64, bpf: add internal-only MOV instruction to resolve per-CPU addrs
Support an instruction for resolving absolute addresses of per-CPU data from their per-CPU offsets. This instruction is internal-only and users are not allowed to use them directly. They will only be used for internal inlining optimizations for now between BPF verifier and BPF JITs. Since commit 7158627 ("arm64: percpu: implement optimised pcpu access using tpidr_el1"), the per-cpu offset for the CPU is stored in the tpidr_el1/2 register of that CPU. To support this BPF instruction in the ARM64 JIT, the following ARM64 instructions are emitted: mov dst, src // Move src to dst, if src != dst mrs tmp, tpidr_el1/2 // Move per-cpu offset of the current cpu in tmp. add dst, dst, tmp // Add the per cpu offset to the dst. To measure the performance improvement provided by this change, the benchmark in [1] was used: Before: glob-arr-inc : 23.597 ± 0.012M/s arr-inc : 23.173 ± 0.019M/s hash-inc : 12.186 ± 0.028M/s After: glob-arr-inc : 23.819 ± 0.034M/s arr-inc : 23.285 ± 0.017M/s hash-inc : 12.419 ± 0.011M/s [1] anakryiko/linux@8dec900975ef Signed-off-by: Puranjay Mohan <[email protected]> Acked-by: Andrii Nakryiko <[email protected]>
1 parent a357e12 commit db67827

File tree

4 files changed

+38
-0
lines changed

4 files changed

+38
-0
lines changed

arch/arm64/include/asm/insn.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,11 @@ enum aarch64_insn_special_register {
135135
AARCH64_INSN_SPCLREG_SP_EL2 = 0xF210
136136
};
137137

138+
enum aarch64_insn_system_register {
139+
AARCH64_INSN_SYSREG_TPIDR_EL1 = 0x4684,
140+
AARCH64_INSN_SYSREG_TPIDR_EL2 = 0x6682,
141+
};
142+
138143
enum aarch64_insn_variant {
139144
AARCH64_INSN_VARIANT_32BIT,
140145
AARCH64_INSN_VARIANT_64BIT
@@ -686,6 +691,8 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
686691
}
687692
#endif
688693
u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type);
694+
u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result,
695+
enum aarch64_insn_system_register sysreg);
689696

690697
s32 aarch64_get_branch_offset(u32 insn);
691698
u32 aarch64_set_branch_offset(u32 insn, s32 offset);

arch/arm64/lib/insn.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,3 +1515,14 @@ u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
15151515

15161516
return insn;
15171517
}
1518+
1519+
u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result,
1520+
enum aarch64_insn_system_register sysreg)
1521+
{
1522+
u32 insn = aarch64_insn_get_mrs_value();
1523+
1524+
insn &= ~GENMASK(19, 0);
1525+
insn |= sysreg << 5;
1526+
return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT,
1527+
insn, result);
1528+
}

arch/arm64/net/bpf_jit.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,4 +297,10 @@
297297
#define A64_ADR(Rd, offset) \
298298
aarch64_insn_gen_adr(0, offset, Rd, AARCH64_INSN_ADR_TYPE_ADR)
299299

300+
/* MRS */
301+
#define A64_MRS_TPIDR_EL1(Rt) \
302+
aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_TPIDR_EL1)
303+
#define A64_MRS_TPIDR_EL2(Rt) \
304+
aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_TPIDR_EL2)
305+
300306
#endif /* _BPF_JIT_H */

arch/arm64/net/bpf_jit_comp.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
890890
emit(A64_ORR(1, tmp, dst, tmp), ctx);
891891
emit(A64_MOV(1, dst, tmp), ctx);
892892
break;
893+
} else if (insn_is_mov_percpu_addr(insn)) {
894+
if (dst != src)
895+
emit(A64_MOV(1, dst, src), ctx);
896+
if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
897+
emit(A64_MRS_TPIDR_EL2(tmp), ctx);
898+
else
899+
emit(A64_MRS_TPIDR_EL1(tmp), ctx);
900+
emit(A64_ADD(1, dst, dst, tmp), ctx);
901+
break;
893902
}
894903
switch (insn->off) {
895904
case 0:
@@ -2559,6 +2568,11 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
25592568
return true;
25602569
}
25612570

2571+
bool bpf_jit_supports_percpu_insn(void)
2572+
{
2573+
return true;
2574+
}
2575+
25622576
void bpf_jit_free(struct bpf_prog *prog)
25632577
{
25642578
if (prog->jited) {

0 commit comments

Comments
 (0)