From ff42bb9fe3091d996c763848afa3e57c2a780217 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:06 -0700 Subject: [PATCH 1/9] nfp: bpf: add helper for emitting nops The need to emitting a few nops will become more common soon as we add stack and map support. Add a helper. This allows for code to be shorter but also may be handy for marking the nops with a "reason" to ease applying optimizations. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 23fb11a41cc4..eb8c905936ac 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -494,6 +494,12 @@ static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) return tmp_reg; } +static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count) +{ + while (count--) + emit_nop(nfp_prog); +} + static void wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_special special) @@ -1799,7 +1805,7 @@ static void nfp_outro(struct nfp_prog *nfp_prog) static int nfp_translate(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta; - int i, err; + int err; nfp_intro(nfp_prog); if (nfp_prog->error) @@ -1831,8 +1837,7 @@ static int nfp_translate(struct nfp_prog *nfp_prog) if (nfp_prog->error) return nfp_prog->error; - for (i = 0; i < NFP_USTORE_PREFETCH_WINDOW; i++) - emit_nop(nfp_prog); + wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW); if (nfp_prog->error) return nfp_prog->error; From 70c78fc138b6d0ef76d9920034e25082dd3a36ac Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:07 -0700 Subject: [PATCH 2/9] nfp: bpf: refactor nfp_bpf_check_ptr() nfp_bpf_check_ptr() mostly looks at the pointer register. Add a temporary variable to shorten the code. While at it make sure we print error messages if translation fails to help users identify the problem (to be carried in ext_ack in due course). Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/bpf/verifier.c | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index e361c0e3b788..4d2ed84a82e0 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -113,17 +113,23 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, static int nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - const struct bpf_verifier_env *env, u8 reg) + const struct bpf_verifier_env *env, u8 reg_no) { - if (env->cur_state.regs[reg].type != PTR_TO_CTX && - env->cur_state.regs[reg].type != PTR_TO_PACKET) - return -EINVAL; + const struct bpf_reg_state *reg = &env->cur_state.regs[reg_no]; - if (meta->ptr.type != NOT_INIT && - meta->ptr.type != env->cur_state.regs[reg].type) + if (reg->type != PTR_TO_CTX && + reg->type != PTR_TO_PACKET) { + pr_info("unsupported ptr type: %d\n", reg->type); return -EINVAL; + } - meta->ptr = env->cur_state.regs[reg]; + if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) { + pr_info("ptr type changed for instruction %d -> %d\n", + meta->ptr.type, reg->type); + return -EINVAL; + } + + meta->ptr = *reg; return 0; } From ee9133a845fe8ad15f989e29bf8e2c8abe7986b8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:08 -0700 Subject: [PATCH 3/9] nfp: bpf: add stack write support Stack is implemented by the LMEM register file. Unaligned accesses to LMEM are not allowed. Accesses also have to be 4B wide. To support stack we need to make sure offsets of pointers are known at translation time (for now) and perform correct load/mask/shift operations. Since we can access first 64B of LMEM without much effort support only stacks not bigger than 64B. Following commits will extend the possible sizes beyond that. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 105 ++++++++++++++++++ drivers/net/ethernet/netronome/nfp/bpf/main.h | 3 + .../net/ethernet/netronome/nfp/bpf/offload.c | 14 +++ .../net/ethernet/netronome/nfp/bpf/verifier.c | 30 ++++- 4 files changed, 147 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index eb8c905936ac..d2a3e9065dbe 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -642,6 +642,100 @@ data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, return 0; } +typedef int +(*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, + unsigned int size); + +static int +wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, + unsigned int size) +{ + u32 idx, dst_byte; + enum shf_sc sc; + swreg reg; + int shf; + u8 mask; + + if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4)) + return -EOPNOTSUPP; + + idx = off / 4; + + /* Move the entire word */ + if (size == 4) { + wrp_mov(nfp_prog, reg_lm(0, idx), reg_b(src)); + return 0; + } + + dst_byte = off % 4; + + mask = (1 << size) - 1; + mask <<= dst_byte; + + if (WARN_ON_ONCE(mask > 0xf)) + return -EOPNOTSUPP; + + shf = abs(src_byte - dst_byte) * 8; + if (src_byte == dst_byte) { + sc = SHF_SC_NONE; + } else if (src_byte < dst_byte) { + shf = 32 - shf; + sc = SHF_SC_L_SHF; + } else { + sc = SHF_SC_R_SHF; + } + + /* ld_field can address fewer indexes, if offset too large do RMW. + * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. + */ + if (idx <= RE_REG_LM_IDX_MAX) { + reg = reg_lm(0, idx); + } else { + reg = imm_a(nfp_prog); + wrp_mov(nfp_prog, reg, reg_lm(0, idx)); + } + + emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf); + + if (idx > RE_REG_LM_IDX_MAX) + wrp_mov(nfp_prog, reg_lm(0, idx), reg); + + return 0; +} + +static int +mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size, u8 gpr, lmem_step step) +{ + s32 off = nfp_prog->stack_depth + meta->insn.off; + u32 gpr_byte = 0; + int ret; + + while (size) { + u32 slice_end; + u8 slice_size; + + slice_size = min(size, 4 - gpr_byte); + slice_end = min(off + slice_size, round_up(off + 1, 4)); + slice_size = slice_end - off; + + ret = step(nfp_prog, gpr, gpr_byte, off, slice_size); + if (ret) + return ret; + + gpr_byte += slice_size; + if (gpr_byte >= 4) { + gpr_byte -= 4; + gpr++; + } + + size -= slice_size; + off += slice_size; + } + + return 0; +} + static void wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) { @@ -1298,6 +1392,14 @@ mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, meta->insn.src_reg * 2, size); } +static int +mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + return mem_op_stack(nfp_prog, meta, size, meta->insn.src_reg * 2, + wrp_lmem_store); +} + static int mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int size) @@ -1305,6 +1407,9 @@ mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, if (meta->ptr.type == PTR_TO_PACKET) return mem_stx_data(nfp_prog, meta, size); + if (meta->ptr.type == PTR_TO_STACK) + return mem_stx_stack(nfp_prog, meta, size); + return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index d77e88a45409..a31632681e79 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -151,6 +151,7 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) * @tgt_done: jump target to get the next packet * @n_translated: number of successfully translated instructions (for errors) * @error: error code if something went wrong + * @stack_depth: max stack depth from the verifier * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) */ struct nfp_prog { @@ -171,6 +172,8 @@ struct nfp_prog { unsigned int n_translated; int error; + unsigned int stack_depth; + struct list_head insns; }; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index a88bb5bc0082..f215abcbc18e 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -146,6 +146,7 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, { unsigned int code_sz = max_instr * sizeof(u64); enum nfp_bpf_action_type act; + unsigned int stack_size; u16 start_off, done_off; unsigned int max_mtu; int ret; @@ -167,6 +168,19 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + if (cls_bpf->prog->aux->stack_depth > 64) { + nn_info(nn, "large stack not supported: program %dB > 64B\n", + cls_bpf->prog->aux->stack_depth); + return -EOPNOTSUPP; + } + + stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; + if (cls_bpf->prog->aux->stack_depth > stack_size) { + nn_info(nn, "stack too large: program %dB > FW stack %dB\n", + cls_bpf->prog->aux->stack_depth, stack_size); + return -EOPNOTSUPP; + } + *code = dma_zalloc_coherent(nn->dp.dev, code_sz, dma_addr, GFP_KERNEL); if (!*code) return -ENOMEM; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 4d2ed84a82e0..376d9938b823 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -111,18 +111,41 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, return 0; } +static int nfp_bpf_check_stack_access(const struct bpf_reg_state *reg) +{ + if (!tnum_is_const(reg->var_off)) { + pr_info("variable ptr stack access\n"); + return -EINVAL; + } + + if (reg->var_off.value || reg->off) { + pr_info("stack access via modified register\n"); + return -EINVAL; + } + + return 0; +} + static int nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, const struct bpf_verifier_env *env, u8 reg_no) { const struct bpf_reg_state *reg = &env->cur_state.regs[reg_no]; + int err; if (reg->type != PTR_TO_CTX && + reg->type != PTR_TO_STACK && reg->type != PTR_TO_PACKET) { pr_info("unsupported ptr type: %d\n", reg->type); return -EINVAL; } + if (reg->type == PTR_TO_STACK) { + err = nfp_bpf_check_stack_access(reg); + if (err) + return err; + } + if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) { pr_info("ptr type changed for instruction %d -> %d\n", meta->ptr.type, reg->type); @@ -143,11 +166,6 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len); priv->meta = meta; - if (meta->insn.src_reg == BPF_REG_10 || - meta->insn.dst_reg == BPF_REG_10) { - pr_err("stack not yet supported\n"); - return -EINVAL; - } if (meta->insn.src_reg >= MAX_BPF_REG || meta->insn.dst_reg >= MAX_BPF_REG) { pr_err("program uses extended registers - jit hardening?\n"); @@ -176,6 +194,8 @@ int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog) struct nfp_bpf_analyzer_priv *priv; int ret; + nfp_prog->stack_depth = prog->aux->stack_depth; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; From a82b23fb38eaaaad89332b90029fc4cd7c3f2545 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:09 -0700 Subject: [PATCH 4/9] nfp: bpf: add stack read support Add simple stack read support, similar to write in every aspect, but data flowing the other way. Note that unlike write which can be done in smaller than word quantities, if registers are loaded with less-than-word of stack contents - the values have to be zero extended. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 81 ++++++++++++++++++-- 1 file changed, 76 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index d2a3e9065dbe..094acea35326 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -644,11 +644,65 @@ data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, typedef int (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, - unsigned int size); + unsigned int size, bool new_gpr); + +static int +wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, + unsigned int size, bool new_gpr) +{ + u32 idx, src_byte; + enum shf_sc sc; + swreg reg; + int shf; + u8 mask; + + if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4)) + return -EOPNOTSUPP; + + idx = off / 4; + + /* Move the entire word */ + if (size == 4) { + wrp_mov(nfp_prog, reg_both(dst), reg_lm(0, idx)); + return 0; + } + + src_byte = off % 4; + + mask = (1 << size) - 1; + mask <<= dst_byte; + + if (WARN_ON_ONCE(mask > 0xf)) + return -EOPNOTSUPP; + + shf = abs(src_byte - dst_byte) * 8; + if (src_byte == dst_byte) { + sc = SHF_SC_NONE; + } else if (src_byte < dst_byte) { + shf = 32 - shf; + sc = SHF_SC_L_SHF; + } else { + sc = SHF_SC_R_SHF; + } + + /* ld_field can address fewer indexes, if offset too large do RMW. + * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. + */ + if (idx <= RE_REG_LM_IDX_MAX) { + reg = reg_lm(0, idx); + } else { + reg = imm_a(nfp_prog); + wrp_mov(nfp_prog, reg, reg_lm(0, idx)); + } + + emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr); + + return 0; +} static int wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, - unsigned int size) + unsigned int size, bool new_gpr) { u32 idx, dst_byte; enum shf_sc sc; @@ -705,12 +759,16 @@ wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, static int mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - unsigned int size, u8 gpr, lmem_step step) + unsigned int size, u8 gpr, bool clr_gpr, lmem_step step) { s32 off = nfp_prog->stack_depth + meta->insn.off; + u8 prev_gpr = 255; u32 gpr_byte = 0; int ret; + if (clr_gpr && size < 8) + wrp_immed(nfp_prog, reg_both(gpr + 1), 0); + while (size) { u32 slice_end; u8 slice_size; @@ -719,10 +777,12 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, slice_end = min(off + slice_size, round_up(off + 1, 4)); slice_size = slice_end - off; - ret = step(nfp_prog, gpr, gpr_byte, off, slice_size); + ret = step(nfp_prog, gpr, gpr_byte, off, slice_size, + gpr != prev_gpr); if (ret) return ret; + prev_gpr = gpr; gpr_byte += slice_size; if (gpr_byte >= 4) { gpr_byte -= 4; @@ -1232,6 +1292,14 @@ static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) meta->insn.src_reg * 2, 4); } +static int +mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + return mem_op_stack(nfp_prog, meta, size, meta->insn.dst_reg * 2, true, + wrp_lmem_load); +} + static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 size) { @@ -1315,6 +1383,9 @@ mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, if (meta->ptr.type == PTR_TO_PACKET) return mem_ldx_data(nfp_prog, meta, size); + if (meta->ptr.type == PTR_TO_STACK) + return mem_ldx_stack(nfp_prog, meta, size); + return -EOPNOTSUPP; } @@ -1396,7 +1467,7 @@ static int mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int size) { - return mem_op_stack(nfp_prog, meta, size, meta->insn.src_reg * 2, + return mem_op_stack(nfp_prog, meta, size, meta->insn.src_reg * 2, false, wrp_lmem_store); } From 9a90c83c09874a2fd03905ef0f73512c9de18799 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:10 -0700 Subject: [PATCH 5/9] nfp: bpf: optimize the RMW for stack accesses When we are performing unaligned stack accesses in the 32-64B window we have to do a read-modify-write cycle. E.g. for reading 8 bytes from address 17: 0: tmp = stack[16] 1: gprLo = tmp >> 8 2: tmp = stack[20] 3: gprLo |= tmp << 24 4: tmp = stack[20] 5: gprHi = tmp >> 8 6: tmp = stack[24] 7: gprHi |= tmp << 24 The load on line 4 is unnecessary, because tmp already contains data from stack[20]. For write we can optimize both loads and writebacks away. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 33 +++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 094acea35326..6730690cf9d8 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -644,11 +644,11 @@ data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, typedef int (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, - unsigned int size, bool new_gpr); + unsigned int size, bool first, bool new_gpr, bool last); static int wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, - unsigned int size, bool new_gpr) + unsigned int size, bool first, bool new_gpr, bool last) { u32 idx, src_byte; enum shf_sc sc; @@ -692,7 +692,13 @@ wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, reg = reg_lm(0, idx); } else { reg = imm_a(nfp_prog); - wrp_mov(nfp_prog, reg, reg_lm(0, idx)); + /* If it's not the first part of the load and we start a new GPR + * that means we are loading a second part of the LMEM word into + * a new GPR. IOW we've already looked that LMEM word and + * therefore it has been loaded into imm_a(). + */ + if (first || !new_gpr) + wrp_mov(nfp_prog, reg, reg_lm(0, idx)); } emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr); @@ -702,7 +708,7 @@ wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, static int wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, - unsigned int size, bool new_gpr) + unsigned int size, bool first, bool new_gpr, bool last) { u32 idx, dst_byte; enum shf_sc sc; @@ -746,13 +752,19 @@ wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, reg = reg_lm(0, idx); } else { reg = imm_a(nfp_prog); - wrp_mov(nfp_prog, reg, reg_lm(0, idx)); + /* Only first and last LMEM locations are going to need RMW, + * the middle location will be overwritten fully. + */ + if (first || last) + wrp_mov(nfp_prog, reg, reg_lm(0, idx)); } emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf); - if (idx > RE_REG_LM_IDX_MAX) - wrp_mov(nfp_prog, reg_lm(0, idx), reg); + if (new_gpr || last) { + if (idx > RE_REG_LM_IDX_MAX) + wrp_mov(nfp_prog, reg_lm(0, idx), reg); + } return 0; } @@ -762,6 +774,7 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int size, u8 gpr, bool clr_gpr, lmem_step step) { s32 off = nfp_prog->stack_depth + meta->insn.off; + bool first = true, last; u8 prev_gpr = 255; u32 gpr_byte = 0; int ret; @@ -777,12 +790,16 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, slice_end = min(off + slice_size, round_up(off + 1, 4)); slice_size = slice_end - off; + last = slice_size == size; + ret = step(nfp_prog, gpr, gpr_byte, off, slice_size, - gpr != prev_gpr); + first, gpr != prev_gpr, last); if (ret) return ret; prev_gpr = gpr; + first = false; + gpr_byte += slice_size; if (gpr_byte >= 4) { gpr_byte -= 4; From d3488480635f453410fd27cea3fc370cedc7e28a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:11 -0700 Subject: [PATCH 6/9] nfp: bpf: allow stack accesses via modified stack registers As long as the verifier tells us the stack offset exactly we can render the LMEM reads quite easily. Simply make sure that the offset is constant for a given instruction and add it to the instruction's offset. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 23 ++++++++++-------- drivers/net/ethernet/netronome/nfp/bpf/main.h | 3 +++ .../net/ethernet/netronome/nfp/bpf/verifier.c | 24 +++++++++++++------ 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 6730690cf9d8..073e382cba04 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -771,9 +771,10 @@ wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, static int mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - unsigned int size, u8 gpr, bool clr_gpr, lmem_step step) + unsigned int size, unsigned int ptr_off, u8 gpr, bool clr_gpr, + lmem_step step) { - s32 off = nfp_prog->stack_depth + meta->insn.off; + s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off; bool first = true, last; u8 prev_gpr = 255; u32 gpr_byte = 0; @@ -1311,10 +1312,10 @@ static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - unsigned int size) + unsigned int size, unsigned int ptr_off) { - return mem_op_stack(nfp_prog, meta, size, meta->insn.dst_reg * 2, true, - wrp_lmem_load); + return mem_op_stack(nfp_prog, meta, size, ptr_off, + meta->insn.dst_reg * 2, true, wrp_lmem_load); } static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, @@ -1401,7 +1402,8 @@ mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, return mem_ldx_data(nfp_prog, meta, size); if (meta->ptr.type == PTR_TO_STACK) - return mem_ldx_stack(nfp_prog, meta, size); + return mem_ldx_stack(nfp_prog, meta, size, + meta->ptr.off + meta->ptr.var_off.value); return -EOPNOTSUPP; } @@ -1482,10 +1484,10 @@ mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, static int mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - unsigned int size) + unsigned int size, unsigned int ptr_off) { - return mem_op_stack(nfp_prog, meta, size, meta->insn.src_reg * 2, false, - wrp_lmem_store); + return mem_op_stack(nfp_prog, meta, size, ptr_off, + meta->insn.src_reg * 2, false, wrp_lmem_store); } static int @@ -1496,7 +1498,8 @@ mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, return mem_stx_data(nfp_prog, meta, size); if (meta->ptr.type == PTR_TO_STACK) - return mem_stx_stack(nfp_prog, meta, size); + return mem_stx_stack(nfp_prog, meta, size, + meta->ptr.off + meta->ptr.var_off.value); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index a31632681e79..d4f144a62f0f 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -56,6 +56,7 @@ enum br_special { enum static_regs { STATIC_REG_IMM = 21, /* Bank AB */ + STATIC_REG_STACK = 22, /* Bank A */ STATIC_REG_PKT_LEN = 22, /* Bank B */ }; @@ -74,6 +75,8 @@ enum nfp_bpf_action_type { #define pv_len(np) reg_lm(1, PKT_VEC_PKT_LEN) #define pv_ctm_ptr(np) reg_lm(1, PKT_VEC_PKT_PTR) +#define stack_reg(np) reg_a(STATIC_REG_STACK) +#define stack_imm(np) imm_b(np) #define plen_reg(np) reg_b(STATIC_REG_PKT_LEN) #define pptr_reg(np) pv_ctm_ptr(np) #define imm_a(np) reg_a(STATIC_REG_IMM) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 376d9938b823..633db3e1a11e 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -111,19 +111,29 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, return 0; } -static int nfp_bpf_check_stack_access(const struct bpf_reg_state *reg) +static int +nfp_bpf_check_stack_access(struct nfp_insn_meta *meta, + const struct bpf_reg_state *reg) { + s32 old_off, new_off; + if (!tnum_is_const(reg->var_off)) { pr_info("variable ptr stack access\n"); return -EINVAL; } - if (reg->var_off.value || reg->off) { - pr_info("stack access via modified register\n"); - return -EINVAL; - } + if (meta->ptr.type == NOT_INIT) + return 0; - return 0; + old_off = meta->ptr.off + meta->ptr.var_off.value; + new_off = reg->off + reg->var_off.value; + + if (old_off == new_off) + return 0; + + pr_info("stack access changed location was:%d is:%d\n", + old_off, new_off); + return -EINVAL; } static int @@ -141,7 +151,7 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, } if (reg->type == PTR_TO_STACK) { - err = nfp_bpf_check_stack_access(reg); + err = nfp_bpf_check_stack_access(meta, reg); if (err) return err; } From 2df03a50f14ab6d888c212aa332536933ded040a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:12 -0700 Subject: [PATCH 7/9] nfp: bpf: support accessing the stack beyond 64 bytes To access beyond 64th byte of the stack we need to set a new stack pointer register (LMEM is accessed indirectly through those pointers). Add a function for encoding local CSR access instruction. Use stack pointer number 3. Note that stack pointer registers allow us to index into 32 bytes of LMEM (with shift operations i.e. when operands are restricted). This means if access is crossing 32 byte boundary we must not use offsetting, we have to set the pointer to the exact address and move it with post-increments. We depend on the datapath placing the stack base address in GPR A22 for our use. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 114 ++++++++++++++++-- .../net/ethernet/netronome/nfp/bpf/offload.c | 6 - drivers/net/ethernet/netronome/nfp/nfp_asm.h | 5 + 3 files changed, 111 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 073e382cba04..5105b9247839 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -427,6 +427,48 @@ emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false); } +static void +__emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr, + bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_LCSR_BASE | + FIELD_PREP(OP_LCSR_A_SRC, areg) | + FIELD_PREP(OP_LCSR_B_SRC, breg) | + FIELD_PREP(OP_LCSR_WRITE, wr) | + FIELD_PREP(OP_LCSR_ADDR, addr) | + FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr) +{ + struct nfp_insn_ur_regs reg; + int err; + + /* This instruction takes immeds instead of reg_none() for the ignored + * operand, but we can't encode 2 immeds in one instr with our normal + * swreg infra so if param is an immed, we encode as reg_none() and + * copy the immed to both operands. + */ + if (swreg_type(src) == NN_REG_IMM) { + err = swreg_to_unrestricted(reg_none(), src, reg_none(), ®); + reg.breg = reg.areg; + } else { + err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), ®); + } + if (err) { + nfp_prog->error = err; + return; + } + + __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4, + false, reg.src_lmextn); +} + static void emit_nop(struct nfp_prog *nfp_prog) { __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0); @@ -644,12 +686,15 @@ data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, typedef int (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, - unsigned int size, bool first, bool new_gpr, bool last); + unsigned int size, bool first, bool new_gpr, bool last, bool lm3, + bool needs_inc); static int wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, - unsigned int size, bool first, bool new_gpr, bool last) + unsigned int size, bool first, bool new_gpr, bool last, bool lm3, + bool needs_inc) { + bool should_inc = needs_inc && new_gpr && !last; u32 idx, src_byte; enum shf_sc sc; swreg reg; @@ -663,10 +708,14 @@ wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, /* Move the entire word */ if (size == 4) { - wrp_mov(nfp_prog, reg_both(dst), reg_lm(0, idx)); + wrp_mov(nfp_prog, reg_both(dst), + should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx)); return 0; } + if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) + return -EOPNOTSUPP; + src_byte = off % 4; mask = (1 << size) - 1; @@ -689,7 +738,7 @@ wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. */ if (idx <= RE_REG_LM_IDX_MAX) { - reg = reg_lm(0, idx); + reg = reg_lm(lm3 ? 3 : 0, idx); } else { reg = imm_a(nfp_prog); /* If it's not the first part of the load and we start a new GPR @@ -703,13 +752,18 @@ wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr); + if (should_inc) + wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); + return 0; } static int wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, - unsigned int size, bool first, bool new_gpr, bool last) + unsigned int size, bool first, bool new_gpr, bool last, bool lm3, + bool needs_inc) { + bool should_inc = needs_inc && new_gpr && !last; u32 idx, dst_byte; enum shf_sc sc; swreg reg; @@ -723,10 +777,15 @@ wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, /* Move the entire word */ if (size == 4) { - wrp_mov(nfp_prog, reg_lm(0, idx), reg_b(src)); + wrp_mov(nfp_prog, + should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx), + reg_b(src)); return 0; } + if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) + return -EOPNOTSUPP; + dst_byte = off % 4; mask = (1 << size) - 1; @@ -749,7 +808,7 @@ wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. */ if (idx <= RE_REG_LM_IDX_MAX) { - reg = reg_lm(0, idx); + reg = reg_lm(lm3 ? 3 : 0, idx); } else { reg = imm_a(nfp_prog); /* Only first and last LMEM locations are going to need RMW, @@ -764,6 +823,8 @@ wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, if (new_gpr || last) { if (idx > RE_REG_LM_IDX_MAX) wrp_mov(nfp_prog, reg_lm(0, idx), reg); + if (should_inc) + wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); } return 0; @@ -776,10 +837,44 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, { s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off; bool first = true, last; + bool needs_inc = false; + swreg stack_off_reg; u8 prev_gpr = 255; u32 gpr_byte = 0; + bool lm3 = true; int ret; + if (off + size <= 64) { + /* We can reach bottom 64B with LMaddr0 */ + lm3 = false; + } else if (round_down(off, 32) == round_down(off + size - 1, 32)) { + /* We have to set up a new pointer. If we know the offset + * and the entire access falls into a single 32 byte aligned + * window we won't have to increment the LM pointer. + * The 32 byte alignment is imporant because offset is ORed in + * not added when doing *l$indexN[off]. + */ + stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32), + stack_imm(nfp_prog)); + emit_alu(nfp_prog, imm_b(nfp_prog), + stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); + + off %= 32; + } else { + stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4), + stack_imm(nfp_prog)); + + emit_alu(nfp_prog, imm_b(nfp_prog), + stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); + + needs_inc = true; + } + if (lm3) { + emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3); + /* For size < 4 one slot will be filled by zeroing of upper. */ + wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3); + } + if (clr_gpr && size < 8) wrp_immed(nfp_prog, reg_both(gpr + 1), 0); @@ -793,8 +888,11 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, last = slice_size == size; + if (needs_inc) + off %= 4; + ret = step(nfp_prog, gpr, gpr_byte, off, slice_size, - first, gpr != prev_gpr, last); + first, gpr != prev_gpr, last, lm3, needs_inc); if (ret) return ret; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index f215abcbc18e..fbca1ca1f39b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -168,12 +168,6 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE); - if (cls_bpf->prog->aux->stack_depth > 64) { - nn_info(nn, "large stack not supported: program %dB > 64B\n", - cls_bpf->prog->aux->stack_depth); - return -EOPNOTSUPP; - } - stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; if (cls_bpf->prog->aux->stack_depth > stack_size) { nn_info(nn, "stack too large: program %dB > FW stack %dB\n", diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index 86e7daee6099..f4d1df3a1925 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -257,6 +257,11 @@ enum lcsr_wr_src { #define OP_CARB_BASE 0x0e000000000ULL #define OP_CARB_OR 0x00000010000ULL +#define NFP_CSR_ACT_LM_ADDR0 0x64 +#define NFP_CSR_ACT_LM_ADDR1 0x6c +#define NFP_CSR_ACT_LM_ADDR2 0x94 +#define NFP_CSR_ACT_LM_ADDR3 0x9c + /* Software register representation, independent of operand type */ #define NN_REG_TYPE GENMASK(31, 24) #define NN_REG_LM_IDX GENMASK(23, 22) From b14157eeed4eff2b293e0ca7738f6a3dbfff51cc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:13 -0700 Subject: [PATCH 8/9] nfp: bpf: support stack accesses via non-constant pointers If stack pointer has a different value on different paths but the alignment to words (4B) remains the same, we can set a new LMEM access pointer to the calculated value and access whichever word it's pointing to. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 41 +++++++++++++++---- drivers/net/ethernet/netronome/nfp/bpf/main.h | 2 + .../net/ethernet/netronome/nfp/bpf/verifier.c | 12 ++++-- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 5105b9247839..d84f00b80aac 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -832,8 +832,8 @@ wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, static int mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - unsigned int size, unsigned int ptr_off, u8 gpr, bool clr_gpr, - lmem_step step) + unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr, + bool clr_gpr, lmem_step step) { s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off; bool first = true, last; @@ -844,7 +844,19 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool lm3 = true; int ret; - if (off + size <= 64) { + if (meta->ptr_not_const) { + /* Use of the last encountered ptr_off is OK, they all have + * the same alignment. Depend on low bits of value being + * discarded when written to LMaddr register. + */ + stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off, + stack_imm(nfp_prog)); + + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg); + + needs_inc = true; + } else if (off + size <= 64) { /* We can reach bottom 64B with LMaddr0 */ lm3 = false; } else if (round_down(off, 32) == round_down(off + size - 1, 32)) { @@ -1096,9 +1108,22 @@ static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out) static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + u8 src = insn->src_reg * 2; - wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); - wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->src_reg * 2 + 1); + if (insn->src_reg == BPF_REG_10) { + swreg stack_depth_reg; + + stack_depth_reg = ur_load_imm_any(nfp_prog, + nfp_prog->stack_depth, + stack_imm(nfp_prog)); + emit_alu(nfp_prog, reg_both(dst), + stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + } else { + wrp_reg_mov(nfp_prog, dst, src); + wrp_reg_mov(nfp_prog, dst + 1, src + 1); + } return 0; } @@ -1413,7 +1438,8 @@ mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int size, unsigned int ptr_off) { return mem_op_stack(nfp_prog, meta, size, ptr_off, - meta->insn.dst_reg * 2, true, wrp_lmem_load); + meta->insn.dst_reg * 2, meta->insn.src_reg * 2, + true, wrp_lmem_load); } static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, @@ -1585,7 +1611,8 @@ mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int size, unsigned int ptr_off) { return mem_op_stack(nfp_prog, meta, size, ptr_off, - meta->insn.src_reg * 2, false, wrp_lmem_store); + meta->insn.src_reg * 2, meta->insn.dst_reg * 2, + false, wrp_lmem_store); } static int diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index d4f144a62f0f..86edc0691a5f 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -101,6 +101,7 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); * struct nfp_insn_meta - BPF instruction wrapper * @insn: BPF instruction * @ptr: pointer type for memory operations + * @ptr_not_const: pointer is not always constant * @off: index of first generated machine instruction (in nfp_prog.prog) * @n: eBPF instruction number * @skip: skip this instruction (optimized out) @@ -110,6 +111,7 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); struct nfp_insn_meta { struct bpf_insn insn; struct bpf_reg_state ptr; + bool ptr_not_const; unsigned int off; unsigned short n; bool skip; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 633db3e1a11e..3d3dcac1c942 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -112,7 +112,8 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, } static int -nfp_bpf_check_stack_access(struct nfp_insn_meta *meta, +nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, const struct bpf_reg_state *reg) { s32 old_off, new_off; @@ -128,7 +129,12 @@ nfp_bpf_check_stack_access(struct nfp_insn_meta *meta, old_off = meta->ptr.off + meta->ptr.var_off.value; new_off = reg->off + reg->var_off.value; - if (old_off == new_off) + meta->ptr_not_const |= old_off != new_off; + + if (!meta->ptr_not_const) + return 0; + + if (old_off % 4 == new_off % 4) return 0; pr_info("stack access changed location was:%d is:%d\n", @@ -151,7 +157,7 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, } if (reg->type == PTR_TO_STACK) { - err = nfp_bpf_check_stack_access(meta, reg); + err = nfp_bpf_check_stack_access(nfp_prog, meta, reg); if (err) return err; } From 9f16c8abcd79fc31a74d3af64f085a009c9d4b5a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Oct 2017 11:58:14 -0700 Subject: [PATCH 9/9] nfp: bpf: optimize mov64 a little Loading 64bit constants require up to 4 load immediates, since we can only load 16 bits at a time. If the 32bit halves of the 64bit constant are the same, however, we can save a cycle by doing a register move instead of two loads of 16 bits. Note that we don't optimize the normal ALU64 load because even though it's a 64 bit load the upper half of the register is a coming from sign extension so we can load it in one cycle anyway. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 21 ++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index d84f00b80aac..e7eeb7a07f81 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1384,19 +1384,28 @@ static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - wrp_immed(nfp_prog, reg_both(nfp_meta_prev(meta)->insn.dst_reg * 2 + 1), - meta->insn.imm); + struct nfp_insn_meta *prev = nfp_meta_prev(meta); + u32 imm_lo, imm_hi; + u8 dst; + + dst = prev->insn.dst_reg * 2; + imm_lo = prev->insn.imm; + imm_hi = meta->insn.imm; + + wrp_immed(nfp_prog, reg_both(dst), imm_lo); + + /* mov is always 1 insn, load imm may be two, so try to use mov */ + if (imm_hi == imm_lo) + wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst)); + else + wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi); return 0; } static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - const struct bpf_insn *insn = &meta->insn; - meta->double_cb = imm_ld8_part2; - wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); - return 0; }