bpf: Inline calls to bpf_loop when callback is known
Calls to `bpf_loop` are replaced with direct loops to avoid
indirection. E.g. the following:
bpf_loop(10, foo, NULL, 0);
Is replaced by equivalent of the following:
for (int i = 0; i < 10; ++i)
foo(i, NULL);
This transformation could be applied when:
- callback is known and does not change during program execution;
- flags passed to `bpf_loop` are always zero.
Inlining logic works as follows:
- During execution simulation function `update_loop_inline_state`
tracks the following information for each `bpf_loop` call
instruction:
- is callback known and constant?
- are flags constant and zero?
- Function `optimize_bpf_loop` increases stack depth for functions
where `bpf_loop` calls can be inlined and invokes `inline_bpf_loop`
to apply the inlining. The additional stack space is used to spill
registers R6, R7 and R8. These registers are used as loop counter,
loop maximal bound and callback context parameter;
Measurements using `benchs/run_bench_bpf_loop.sh` inside QEMU / KVM on
i7-4710HQ CPU show a drop in latency from 14 ns/op to 2 ns/op.
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/r/20220620235344.569325-4-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
parent
7a42008ca5
commit
1ade237119
4 changed files with 195 additions and 9 deletions
|
|
@ -1286,6 +1286,9 @@ struct bpf_array {
|
|||
#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
|
||||
#define MAX_TAIL_CALL_CNT 33
|
||||
|
||||
/* Maximum number of loops for bpf_loop */
|
||||
#define BPF_MAX_LOOPS BIT(23)
|
||||
|
||||
#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \
|
||||
BPF_F_RDONLY_PROG | \
|
||||
BPF_F_WRONLY | \
|
||||
|
|
|
|||
|
|
@ -344,6 +344,14 @@ struct bpf_verifier_state_list {
|
|||
int miss_cnt, hit_cnt;
|
||||
};
|
||||
|
||||
struct bpf_loop_inline_state {
|
||||
int initialized:1; /* set to true upon first entry */
|
||||
int fit_for_inline:1; /* true if callback function is the same
|
||||
* at each call and flags are always zero
|
||||
*/
|
||||
u32 callback_subprogno; /* valid when fit_for_inline is true */
|
||||
};
|
||||
|
||||
/* Possible states for alu_state member. */
|
||||
#define BPF_ALU_SANITIZE_SRC (1U << 0)
|
||||
#define BPF_ALU_SANITIZE_DST (1U << 1)
|
||||
|
|
@ -373,6 +381,10 @@ struct bpf_insn_aux_data {
|
|||
u32 mem_size; /* mem_size for non-struct typed var */
|
||||
};
|
||||
} btf_var;
|
||||
/* if instruction is a call to bpf_loop this field tracks
|
||||
* the state of the relevant registers to make decision about inlining
|
||||
*/
|
||||
struct bpf_loop_inline_state loop_inline_state;
|
||||
};
|
||||
u64 map_key_state; /* constant (32 bit) key tracking for maps */
|
||||
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue