Skip to content

Commit

Permalink
bpf: Fix fexit trampoline.
Browse files Browse the repository at this point in the history
[ Upstream commit e21aa34 ]

The fexit/fmod_ret programs can be attached to kernel functions that can sleep.
The synchronize_rcu_tasks() will not wait for such tasks to complete.
In such case the trampoline image will be freed and when the task
wakes up the return IP will point to freed memory causing the crash.
Solve this by adding percpu_ref_get/put for the duration of trampoline
and separate trampoline vs its image life times.
The "half page" optimization has to be removed, since
first_half->second_half->first_half transition cannot be guaranteed to
complete in deterministic time. Every trampoline update becomes a new image.
The image with fmod_ret or fexit progs will be freed via percpu_ref_kill and
call_rcu_tasks. Together they will wait for the original function and
trampoline asm to complete. The trampoline is patched from nop to jmp to skip
fexit progs. They are freed independently from the trampoline. The image with
fentry progs only will be freed via call_rcu_tasks_trace+call_rcu_tasks which
will wait for both sleepable and non-sleepable progs to complete.

Fixes: fec56f5 ("bpf: Introduce BPF trampoline")
Reported-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Paul E. McKenney <paulmck@kernel.org>  # for RCU
Link: https://lore.kernel.org/bpf/20210316210007.38949-1-alexei.starovoitov@gmail.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
Alexei Starovoitov authored and gregkh committed Mar 30, 2021
1 parent 4c2d548 commit 85d177f
Show file tree
Hide file tree
Showing 5 changed files with 213 additions and 61 deletions.
26 changes: 22 additions & 4 deletions arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1735,7 +1735,7 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
* add rsp, 8 // skip eth_type_trans's frame
* ret // return to its caller
*/
int arch_prepare_bpf_trampoline(void *image, void *image_end,
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
struct bpf_tramp_progs *tprogs,
void *orig_call)
Expand Down Expand Up @@ -1774,6 +1774,15 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,

save_regs(m, &prog, nr_args, stack_size);

if (flags & BPF_TRAMP_F_CALL_ORIG) {
/* arg1: mov rdi, im */
emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
if (emit_call(&prog, __bpf_tramp_enter, prog)) {
ret = -EINVAL;
goto cleanup;
}
}

if (fentry->nr_progs)
if (invoke_bpf(m, &prog, fentry, stack_size))
return -EINVAL;
Expand All @@ -1792,8 +1801,7 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
}

if (flags & BPF_TRAMP_F_CALL_ORIG) {
if (fentry->nr_progs || fmod_ret->nr_progs)
restore_regs(m, &prog, nr_args, stack_size);
restore_regs(m, &prog, nr_args, stack_size);

/* call original function */
if (emit_call(&prog, orig_call, prog)) {
Expand All @@ -1802,6 +1810,8 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
}
/* remember return value in a stack for bpf prog to access */
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
im->ip_after_call = prog;
emit_nops(&prog, 5);
}

if (fmod_ret->nr_progs) {
Expand Down Expand Up @@ -1832,9 +1842,17 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
* the return value is only updated on the stack and still needs to be
* restored to R0.
*/
if (flags & BPF_TRAMP_F_CALL_ORIG)
if (flags & BPF_TRAMP_F_CALL_ORIG) {
im->ip_epilogue = prog;
/* arg1: mov rdi, im */
emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
if (emit_call(&prog, __bpf_tramp_exit, prog)) {
ret = -EINVAL;
goto cleanup;
}
/* restore original return value back into RAX */
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
}

EMIT1(0x5B); /* pop rbx */
EMIT1(0xC9); /* leave */
Expand Down
24 changes: 20 additions & 4 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <linux/capability.h>
#include <linux/sched/mm.h>
#include <linux/slab.h>
#include <linux/percpu-refcount.h>

struct bpf_verifier_env;
struct bpf_verifier_log;
Expand Down Expand Up @@ -563,7 +564,8 @@ struct bpf_tramp_progs {
* fentry = a set of program to run before calling original function
* fexit = a set of program to run after original function
*/
int arch_prepare_bpf_trampoline(void *image, void *image_end,
struct bpf_tramp_image;
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
struct bpf_tramp_progs *tprogs,
void *orig_call);
Expand All @@ -572,6 +574,8 @@ u64 notrace __bpf_prog_enter(void);
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
void notrace __bpf_prog_enter_sleepable(void);
void notrace __bpf_prog_exit_sleepable(void);
void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);

struct bpf_ksym {
unsigned long start;
Expand All @@ -590,6 +594,18 @@ enum bpf_tramp_prog_type {
BPF_TRAMP_REPLACE, /* more than MAX */
};

struct bpf_tramp_image {
void *image;
struct bpf_ksym ksym;
struct percpu_ref pcref;
void *ip_after_call;
void *ip_epilogue;
union {
struct rcu_head rcu;
struct work_struct work;
};
};

struct bpf_trampoline {
/* hlist for trampoline_table */
struct hlist_node hlist;
Expand All @@ -612,9 +628,8 @@ struct bpf_trampoline {
/* Number of attached programs. A counter per kind. */
int progs_cnt[BPF_TRAMP_MAX];
/* Executable image of trampoline */
void *image;
struct bpf_tramp_image *cur_image;
u64 selector;
struct bpf_ksym ksym;
};

struct bpf_attach_target_info {
Expand Down Expand Up @@ -698,6 +713,8 @@ void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
int bpf_jit_charge_modmem(u32 pages);
void bpf_jit_uncharge_modmem(u32 pages);
#else
static inline int bpf_trampoline_link_prog(struct bpf_prog *prog,
struct bpf_trampoline *tr)
Expand Down Expand Up @@ -788,7 +805,6 @@ struct bpf_prog_aux {
bool func_proto_unreliable;
bool sleepable;
bool tail_call_reachable;
enum bpf_tramp_prog_type trampoline_prog_type;
struct hlist_node tramp_hlist;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/bpf_struct_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,

tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
err = arch_prepare_bpf_trampoline(image,
err = arch_prepare_bpf_trampoline(NULL, image,
st_map->image + PAGE_SIZE,
&st_ops->func_models[i], 0,
tprogs, NULL);
Expand Down
4 changes: 2 additions & 2 deletions kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -817,7 +817,7 @@ static int __init bpf_jit_charge_init(void)
}
pure_initcall(bpf_jit_charge_init);

static int bpf_jit_charge_modmem(u32 pages)
int bpf_jit_charge_modmem(u32 pages)
{
if (atomic_long_add_return(pages, &bpf_jit_current) >
(bpf_jit_limit >> PAGE_SHIFT)) {
Expand All @@ -830,7 +830,7 @@ static int bpf_jit_charge_modmem(u32 pages)
return 0;
}

static void bpf_jit_uncharge_modmem(u32 pages)
void bpf_jit_uncharge_modmem(u32 pages)
{
atomic_long_sub(pages, &bpf_jit_current);
}
Expand Down

0 comments on commit 85d177f

Please sign in to comment.