diff options
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r-- | kernel/bpf/verifier.c | 876 |
1 files changed, 410 insertions, 466 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c4f69a9e9af6..73bba397672a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -674,6 +674,8 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) return BPF_DYNPTR_TYPE_SKB; case DYNPTR_TYPE_XDP: return BPF_DYNPTR_TYPE_XDP; + case DYNPTR_TYPE_SKB_META: + return BPF_DYNPTR_TYPE_SKB_META; default: return BPF_DYNPTR_TYPE_INVALID; } @@ -690,6 +692,8 @@ static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type) return DYNPTR_TYPE_SKB; case BPF_DYNPTR_TYPE_XDP: return DYNPTR_TYPE_XDP; + case BPF_DYNPTR_TYPE_SKB_META: + return DYNPTR_TYPE_SKB_META; default: return 0; } @@ -783,8 +787,7 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_ state->stack[spi - 1].spilled_ptr.ref_obj_id = id; } - state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; - state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN; + bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi)); return 0; } @@ -801,29 +804,7 @@ static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_stat __mark_reg_not_init(env, &state->stack[spi].spilled_ptr); __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr); - /* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot? - * - * While we don't allow reading STACK_INVALID, it is still possible to - * do <8 byte writes marking some but not all slots as STACK_MISC. Then, - * helpers or insns can do partial read of that part without failing, - * but check_stack_range_initialized, check_stack_read_var_off, and - * check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of - * the slot conservatively. Hence we need to prevent those liveness - * marking walks. - * - * This was not a problem before because STACK_INVALID is only set by - * default (where the default reg state has its reg->parent as NULL), or - * in clean_live_states after REG_LIVE_DONE (at which point - * mark_reg_read won't walk reg->parent chain), but not randomly during - * verifier state exploration (like we did above). Hence, for our case - * parentage chain will still be live (i.e. reg->parent may be - * non-NULL), while earlier reg->parent was NULL, so we need - * REG_LIVE_WRITTEN to screen off read marker propagation when it is - * done later on reads or by mark_dynptr_read as well to unnecessary - * mark registers in verifier state. - */ - state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; - state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN; + bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi)); } static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg) @@ -932,9 +913,7 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, __mark_reg_not_init(env, &state->stack[spi].spilled_ptr); __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr); - /* Same reason as unmark_stack_slots_dynptr above */ - state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; - state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN; + bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi)); return 0; } @@ -1052,7 +1031,6 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env, else st->type |= PTR_UNTRUSTED; } - st->live |= REG_LIVE_WRITTEN; st->ref_obj_id = i == 0 ? id : 0; st->iter.btf = btf; st->iter.btf_id = btf_id; @@ -1062,6 +1040,7 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env, for (j = 0; j < BPF_REG_SIZE; j++) slot->slot_type[j] = STACK_ITER; + bpf_mark_stack_write(env, state->frameno, BIT(spi - i)); mark_stack_slot_scratched(env, spi - i); } @@ -1087,12 +1066,10 @@ static int unmark_stack_slots_iter(struct bpf_verifier_env *env, __mark_reg_not_init(env, st); - /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */ - st->live |= REG_LIVE_WRITTEN; - for (j = 0; j < BPF_REG_SIZE; j++) slot->slot_type[j] = STACK_INVALID; + bpf_mark_stack_write(env, state->frameno, BIT(spi - i)); mark_stack_slot_scratched(env, spi - i); } @@ -1182,9 +1159,9 @@ static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env, slot = &state->stack[spi]; st = &slot->spilled_ptr; + bpf_mark_stack_write(env, reg->frameno, BIT(spi)); __mark_reg_known_zero(st); st->type = PTR_TO_STACK; /* we don't have dedicated reg type */ - st->live |= REG_LIVE_WRITTEN; st->ref_obj_id = id; st->irq.kfunc_class = kfunc_class; @@ -1238,8 +1215,7 @@ static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_r __mark_reg_not_init(env, st); - /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */ - st->live |= REG_LIVE_WRITTEN; + bpf_mark_stack_write(env, reg->frameno, BIT(spi)); for (i = 0; i < BPF_REG_SIZE; i++) slot->slot_type[i] = STACK_INVALID; @@ -1754,6 +1730,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, return err; dst_state->speculative = src->speculative; dst_state->in_sleepable = src->in_sleepable; + dst_state->cleaned = src->cleaned; dst_state->curframe = src->curframe; dst_state->branches = src->branches; dst_state->parent = src->parent; @@ -1946,9 +1923,24 @@ static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_stat return 0; visit = scc_visit_lookup(env, callchain); if (!visit) { - verifier_bug(env, "scc exit: no visit info for call chain %s", - format_callchain(env, callchain)); - return -EFAULT; + /* + * If path traversal stops inside an SCC, corresponding bpf_scc_visit + * must exist for non-speculative paths. For non-speculative paths + * traversal stops when: + * a. Verification error is found, maybe_exit_scc() is not called. + * b. Top level BPF_EXIT is reached. Top level BPF_EXIT is not a member + * of any SCC. + * c. A checkpoint is reached and matched. Checkpoints are created by + * is_state_visited(), which calls maybe_enter_scc(), which allocates + * bpf_scc_visit instances for checkpoints within SCCs. + * (c) is the only case that can reach this point. + */ + if (!st->speculative) { + verifier_bug(env, "scc exit: no visit info for call chain %s", + format_callchain(env, callchain)); + return -EFAULT; + } + return 0; } if (visit->entry_state != st) return 0; @@ -2017,7 +2009,7 @@ static void free_backedges(struct bpf_scc_visit *visit) for (backedge = visit->backedges; backedge; backedge = next) { free_verifier_state(&backedge->state, false); next = backedge->next; - kvfree(backedge); + kfree(backedge); } visit->backedges = NULL; } @@ -2232,10 +2224,10 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) /* transfer reg's id which is unique for every map_lookup_elem * as UID of the inner map. */ - if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER)) - reg->map_uid = reg->id; - if (btf_record_has_field(map->inner_map_meta->record, BPF_WORKQUEUE)) + if (btf_record_has_field(map->inner_map_meta->record, + BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) { reg->map_uid = reg->id; + } } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { reg->type = PTR_TO_XDP_SOCK; } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || @@ -2274,7 +2266,8 @@ static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg) static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg) { return base_type(reg->type) == PTR_TO_MEM && - (reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP); + (reg->type & + (DYNPTR_TYPE_SKB | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META)); } /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */ @@ -2873,8 +2866,6 @@ static void init_reg_state(struct bpf_verifier_env *env, for (i = 0; i < MAX_BPF_REG; i++) { mark_reg_not_init(env, regs, i); - regs[i].live = REG_LIVE_NONE; - regs[i].parent = NULL; regs[i].subreg_def = DEF_NOT_SUBREG; } @@ -2958,7 +2949,7 @@ static int cmp_subprogs(const void *a, const void *b) } /* Find subprogram that contains instruction at 'off' */ -static struct bpf_subprog_info *find_containing_subprog(struct bpf_verifier_env *env, int off) +struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off) { struct bpf_subprog_info *vals = env->subprog_info; int l, r, m; @@ -2983,7 +2974,7 @@ static int find_subprog(struct bpf_verifier_env *env, int off) { struct bpf_subprog_info *p; - p = find_containing_subprog(env, off); + p = bpf_find_containing_subprog(env, off); if (!p || p->start != off) return -ENOENT; return p - env->subprog_info; @@ -3494,15 +3485,6 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env) return 0; } -static int jmp_offset(struct bpf_insn *insn) -{ - u8 code = insn->code; - - if (code == (BPF_JMP32 | BPF_JA)) - return insn->imm; - return insn->off; -} - static int check_subprogs(struct bpf_verifier_env *env) { int i, subprog_start, subprog_end, off, cur_subprog = 0; @@ -3529,7 +3511,7 @@ static int check_subprogs(struct bpf_verifier_env *env) goto next; if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) goto next; - off = i + jmp_offset(&insn[i]) + 1; + off = i + bpf_jmp_offset(&insn[i]) + 1; if (off < subprog_start || off >= subprog_end) { verbose(env, "jump out of range from insn %d to %d\n", i, off); return -EINVAL; @@ -3555,69 +3537,15 @@ next: return 0; } -/* Parentage chain of this register (or stack slot) should take care of all - * issues like callee-saved registers, stack slot allocation time, etc. - */ -static int mark_reg_read(struct bpf_verifier_env *env, - const struct bpf_reg_state *state, - struct bpf_reg_state *parent, u8 flag) -{ - bool writes = parent == state->parent; /* Observe write marks */ - int cnt = 0; - - while (parent) { - /* if read wasn't screened by an earlier write ... */ - if (writes && state->live & REG_LIVE_WRITTEN) - break; - if (verifier_bug_if(parent->live & REG_LIVE_DONE, env, - "type %s var_off %lld off %d", - reg_type_str(env, parent->type), - parent->var_off.value, parent->off)) - return -EFAULT; - /* The first condition is more likely to be true than the - * second, checked it first. - */ - if ((parent->live & REG_LIVE_READ) == flag || - parent->live & REG_LIVE_READ64) - /* The parentage chain never changes and - * this parent was already marked as LIVE_READ. - * There is no need to keep walking the chain again and - * keep re-marking all parents as LIVE_READ. - * This case happens when the same register is read - * multiple times without writes into it in-between. - * Also, if parent has the stronger REG_LIVE_READ64 set, - * then no need to set the weak REG_LIVE_READ32. - */ - break; - /* ... then we depend on parent's value */ - parent->live |= flag; - /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */ - if (flag == REG_LIVE_READ64) - parent->live &= ~REG_LIVE_READ32; - state = parent; - parent = state->parent; - writes = true; - cnt++; - } - - if (env->longest_mark_read_walk < cnt) - env->longest_mark_read_walk = cnt; - return 0; -} - static int mark_stack_slot_obj_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi, int nr_slots) { - struct bpf_func_state *state = func(env, reg); int err, i; for (i = 0; i < nr_slots; i++) { - struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr; - - err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64); + err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx, BIT(spi - i)); if (err) return err; - mark_stack_slot_scratched(env, spi - i); } return 0; @@ -3663,7 +3591,7 @@ static int mark_irq_flag_read(struct bpf_verifier_env *env, struct bpf_reg_state * code only. It returns TRUE if the source or destination register operates * on 64-bit, otherwise return FALSE. */ -static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, +static bool is_reg64(struct bpf_insn *insn, u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t) { u8 code, class, op; @@ -3774,14 +3702,14 @@ static int insn_def_regno(const struct bpf_insn *insn) } /* Return TRUE if INSN has defined any 32-bit value explicitly. */ -static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn) +static bool insn_has_def32(struct bpf_insn *insn) { int dst_reg = insn_def_regno(insn); if (dst_reg == -1) return false; - return !is_reg64(env, insn, dst_reg, NULL, DST_OP); + return !is_reg64(insn, dst_reg, NULL, DST_OP); } static void mark_insn_zext(struct bpf_verifier_env *env, @@ -3812,7 +3740,7 @@ static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *r mark_reg_scratched(env, regno); reg = ®s[regno]; - rw64 = is_reg64(env, insn, regno, reg, t); + rw64 = is_reg64(insn, regno, reg, t); if (t == SRC_OP) { /* check whether register used as source operand can be read */ if (reg->type == NOT_INIT) { @@ -3826,15 +3754,13 @@ static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *r if (rw64) mark_insn_zext(env, reg); - return mark_reg_read(env, reg, reg->parent, - rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32); + return 0; } else { /* check whether register used as dest operand can be written to */ if (regno == BPF_REG_FP) { verbose(env, "frame pointer is read only\n"); return -EACCES; } - reg->live |= REG_LIVE_WRITTEN; reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1; if (t == DST_OP) mark_reg_unknown(env, regs, regno); @@ -4195,7 +4121,7 @@ static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask) } } /* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */ -static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask) +void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask) { DECLARE_BITMAP(mask, 64); bool first = true; @@ -4250,8 +4176,6 @@ static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_histo } } -static bool calls_callback(struct bpf_verifier_env *env, int insn_idx); - /* For given verifier state backtrack_insn() is called from the last insn to * the first insn. Its purpose is to compute a bitmask of registers and * stack slots that needs precision in the parent verifier state. @@ -4278,7 +4202,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt)); verbose(env, "mark_precise: frame%d: regs=%s ", bt->frame, env->tmp_str_buf); - fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt)); + bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt)); verbose(env, "stack=%s before ", env->tmp_str_buf); verbose(env, "%d: ", idx); verbose_insn(env, insn); @@ -4479,7 +4403,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, * backtracking, as these registers are set by the function * invoking callback. */ - if (subseq_idx >= 0 && calls_callback(env, subseq_idx)) + if (subseq_idx >= 0 && bpf_calls_callback(env, subseq_idx)) for (i = BPF_REG_1; i <= BPF_REG_5; i++) bt_clear_reg(bt, i); if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { @@ -4918,7 +4842,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, bt_frame_reg_mask(bt, fr)); verbose(env, "mark_precise: frame%d: parent state regs=%s ", fr, env->tmp_str_buf); - fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, + bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_frame_stack_mask(bt, fr)); verbose(env, "stack=%s: ", env->tmp_str_buf); print_verifier_state(env, st, fr, true); @@ -5041,12 +4965,7 @@ static void assign_scalar_id_before_mov(struct bpf_verifier_env *env, /* Copy src state preserving dst->parent and dst->live fields */ static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src) { - struct bpf_reg_state *parent = dst->parent; - enum bpf_reg_liveness live = dst->live; - *dst = *src; - dst->parent = parent; - dst->live = live; } static void save_register_state(struct bpf_verifier_env *env, @@ -5057,8 +4976,6 @@ static void save_register_state(struct bpf_verifier_env *env, int i; copy_register_state(&state->stack[spi].spilled_ptr, reg); - if (size == BPF_REG_SIZE) - state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--) state->stack[spi].slot_type[i - 1] = STACK_SPILL; @@ -5152,6 +5069,18 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, if (err) return err; + if (!(off % BPF_REG_SIZE) && size == BPF_REG_SIZE) { + /* only mark the slot as written if all 8 bytes were written + * otherwise read propagation may incorrectly stop too soon + * when stack slots are partially written. + * This heuristic means that read propagation will be + * conservative, since it will add reg_live_read marks + * to stack slots all the way to first state when programs + * writes+reads less than 8 bytes + */ + bpf_mark_stack_write(env, state->frameno, BIT(spi)); + } + check_fastcall_stack_contract(env, state, insn_idx, off); mark_stack_slot_scratched(env, spi); if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) { @@ -5195,17 +5124,6 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, for (i = 0; i < BPF_REG_SIZE; i++) scrub_spilled_slot(&state->stack[spi].slot_type[i]); - /* only mark the slot as written if all 8 bytes were written - * otherwise read propagation may incorrectly stop too soon - * when stack slots are partially written. - * This heuristic means that read propagation will be - * conservative, since it will add reg_live_read marks - * to stack slots all the way to first state when programs - * writes+reads less than 8 bytes - */ - if (size == BPF_REG_SIZE) - state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; - /* when we zero initialize stack slots mark them as such */ if ((reg && register_is_null(reg)) || (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) { @@ -5398,7 +5316,6 @@ static void mark_reg_stack_read(struct bpf_verifier_env *env, /* have read misc data from the stack */ mark_reg_unknown(env, state->regs, dst_regno); } - state->regs[dst_regno].live |= REG_LIVE_WRITTEN; } /* Read the stack at 'off' and put the results into the register indicated by @@ -5421,12 +5338,16 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, struct bpf_reg_state *reg; u8 *stype, type; int insn_flags = insn_stack_access_flags(reg_state->frameno, spi); + int err; stype = reg_state->stack[spi].slot_type; reg = ®_state->stack[spi].spilled_ptr; mark_stack_slot_scratched(env, spi); check_fastcall_stack_contract(env, state, env->insn_idx, off); + err = bpf_mark_stack_read(env, reg_state->frameno, env->insn_idx, BIT(spi)); + if (err) + return err; if (is_spilled_reg(®_state->stack[spi])) { u8 spill_size = 1; @@ -5441,7 +5362,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, return -EACCES; } - mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); if (dst_regno < 0) return 0; @@ -5495,7 +5415,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, insn_flags = 0; /* not restoring original register state */ } } - state->regs[dst_regno].live |= REG_LIVE_WRITTEN; } else if (dst_regno >= 0) { /* restore register state from stack */ copy_register_state(&state->regs[dst_regno], reg); @@ -5503,7 +5422,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, * has its liveness marks cleared by is_state_visited() * which resets stack/reg liveness for state transitions */ - state->regs[dst_regno].live |= REG_LIVE_WRITTEN; } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) { /* If dst_regno==-1, the caller is asking us whether * it is acceptable to use this value as a SCALAR_VALUE @@ -5515,7 +5433,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, off); return -EACCES; } - mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); } else { for (i = 0; i < size; i++) { type = stype[(slot - i) % BPF_REG_SIZE]; @@ -5529,7 +5446,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, off, i, size); return -EACCES; } - mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); if (dst_regno >= 0) mark_reg_stack_read(env, reg_state, off, off + size, dst_regno); insn_flags = 0; /* we are not restoring spilled register */ @@ -8157,10 +8073,10 @@ mark: /* reading any byte out of 8-byte 'spill_slot' will cause * the whole slot to be marked as 'read' */ - mark_reg_read(env, &state->stack[spi].spilled_ptr, - state->stack[spi].spilled_ptr.parent, - REG_LIVE_READ64); - /* We do not set REG_LIVE_WRITTEN for stack slot, as we can not + err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx, BIT(spi)); + if (err) + return err; + /* We do not call bpf_mark_stack_write(), as we can not * be sure that whether stack slot is written to or not. Hence, * we must still conservatively propagate reads upwards even if * helper may write to the entire memory range. @@ -8515,38 +8431,70 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, int flags) return 0; } -static int process_timer_func(struct bpf_verifier_env *env, int regno, - struct bpf_call_arg_meta *meta) +/* Check if @regno is a pointer to a specific field in a map value */ +static int check_map_field_pointer(struct bpf_verifier_env *env, u32 regno, + enum btf_field_type field_type) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; bool is_const = tnum_is_const(reg->var_off); struct bpf_map *map = reg->map_ptr; u64 val = reg->var_off.value; + const char *struct_name = btf_field_type_name(field_type); + int field_off = -1; if (!is_const) { verbose(env, - "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n", - regno); + "R%d doesn't have constant offset. %s has to be at the constant offset\n", + regno, struct_name); return -EINVAL; } if (!map->btf) { - verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n", - map->name); + verbose(env, "map '%s' has to have BTF in order to use %s\n", map->name, + struct_name); + return -EINVAL; + } + if (!btf_record_has_field(map->record, field_type)) { + verbose(env, "map '%s' has no valid %s\n", map->name, struct_name); return -EINVAL; } - if (!btf_record_has_field(map->record, BPF_TIMER)) { - verbose(env, "map '%s' has no valid bpf_timer\n", map->name); + switch (field_type) { + case BPF_TIMER: + field_off = map->record->timer_off; + break; + case BPF_TASK_WORK: + field_off = map->record->task_work_off; + break; + default: + verifier_bug(env, "unsupported BTF field type: %s\n", struct_name); return -EINVAL; } - if (map->record->timer_off != val + reg->off) { - verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n", - val + reg->off, map->record->timer_off); + if (field_off != val + reg->off) { + verbose(env, "off %lld doesn't point to 'struct %s' that is at %d\n", + val + reg->off, struct_name, field_off); return -EINVAL; } + return 0; +} + +static int process_timer_func(struct bpf_verifier_env *env, int regno, + struct bpf_call_arg_meta *meta) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_map *map = reg->map_ptr; + int err; + + err = check_map_field_pointer(env, regno, BPF_TIMER); + if (err) + return err; + if (meta->map_ptr) { verifier_bug(env, "Two map pointers in a timer helper"); return -EFAULT; } + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n"); + return -EOPNOTSUPP; + } meta->map_uid = reg->map_uid; meta->map_ptr = map; return 0; @@ -8569,6 +8517,26 @@ static int process_wq_func(struct bpf_verifier_env *env, int regno, return 0; } +static int process_task_work_func(struct bpf_verifier_env *env, int regno, + struct bpf_kfunc_call_arg_meta *meta) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_map *map = reg->map_ptr; + int err; + + err = check_map_field_pointer(env, regno, BPF_TASK_WORK); + if (err) + return err; + + if (meta->map.ptr) { + verifier_bug(env, "Two map pointers in a bpf_task_work helper"); + return -EFAULT; + } + meta->map.uid = reg->map_uid; + meta->map.ptr = map; + return 0; +} + static int process_kptr_func(struct bpf_verifier_env *env, int regno, struct bpf_call_arg_meta *meta) { @@ -10398,6 +10366,8 @@ typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env, struct bpf_func_state *callee, int insn_idx); +static bool is_task_work_add_kfunc(u32 func_id); + static int set_callee_state(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee, int insn_idx); @@ -10616,7 +10586,8 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins env->subprog_info[subprog].is_async_cb = true; async_cb = push_async_cb(env, env->subprog_info[subprog].start, insn_idx, subprog, - is_bpf_wq_set_callback_impl_kfunc(insn->imm)); + is_bpf_wq_set_callback_impl_kfunc(insn->imm) || + is_task_work_add_kfunc(insn->imm)); if (!async_cb) return -EFAULT; callee = async_cb->frame[0]; @@ -10717,6 +10688,8 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, /* and go analyze first insn of the callee */ *insn_idx = env->subprog_info[subprog].start - 1; + bpf_reset_live_stack_callchain(env); + if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "caller:\n"); print_verifier_state(env, state, caller->frameno, true); @@ -10842,7 +10815,7 @@ static int set_timer_callback_state(struct bpf_verifier_env *env, __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); callee->in_async_callback_fn = true; - callee->callback_ret_range = retval_range(0, 1); + callee->callback_ret_range = retval_range(0, 0); return 0; } @@ -10929,6 +10902,36 @@ static int set_rbtree_add_callback_state(struct bpf_verifier_env *env, return 0; } +static int set_task_work_schedule_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + struct bpf_map *map_ptr = caller->regs[BPF_REG_3].map_ptr; + + /* + * callback_fn(struct bpf_map *map, void *key, void *value); + */ + callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP; + __mark_reg_known_zero(&callee->regs[BPF_REG_1]); + callee->regs[BPF_REG_1].map_ptr = map_ptr; + + callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY; + __mark_reg_known_zero(&callee->regs[BPF_REG_2]); + callee->regs[BPF_REG_2].map_ptr = map_ptr; + + callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE; + __mark_reg_known_zero(&callee->regs[BPF_REG_3]); + callee->regs[BPF_REG_3].map_ptr = map_ptr; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + callee->in_async_callback_fn = true; + callee->callback_ret_range = retval_range(S32_MIN, S32_MAX); + return 0; +} + static bool is_rbtree_lock_required_kfunc(u32 btf_id); /* Are we currently verifying the callback for a rbtree helper that must @@ -10992,8 +10995,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) } /* we are going to rely on register's precise value */ - err = mark_reg_read(env, r0, r0->parent, REG_LIVE_READ64); - err = err ?: mark_chain_precision(env, BPF_REG_0); + err = mark_chain_precision(env, BPF_REG_0); if (err) return err; @@ -11003,7 +11005,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) "At callback return", "R0"); return -EINVAL; } - if (!calls_callback(env, callee->callsite)) { + if (!bpf_calls_callback(env, callee->callsite)) { verifier_bug(env, "in callback at %d, callsite %d !calls_callback", *insn_idx, callee->callsite); return -EFAULT; @@ -11354,7 +11356,7 @@ static int get_helper_proto(struct bpf_verifier_env *env, int func_id, return -EINVAL; *ptr = env->ops->get_func_proto(func_id, env->prog); - return *ptr ? 0 : -EINVAL; + return *ptr && (*ptr)->func ? 0 : -EINVAL; } static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, @@ -11641,7 +11643,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn if (dynptr_type == BPF_DYNPTR_TYPE_INVALID) return -EFAULT; - if (dynptr_type == BPF_DYNPTR_TYPE_SKB) + if (dynptr_type == BPF_DYNPTR_TYPE_SKB || + dynptr_type == BPF_DYNPTR_TYPE_SKB_META) /* this will trigger clear_all_pkt_pointers(), which will * invalidate all dynptr slices associated with the skb */ @@ -11896,17 +11899,11 @@ static void __mark_btf_func_reg_size(struct bpf_verifier_env *env, struct bpf_re if (regno == BPF_REG_0) { /* Function return value */ - reg->live |= REG_LIVE_WRITTEN; reg->subreg_def = reg_size == sizeof(u64) ? DEF_NOT_SUBREG : env->insn_idx + 1; - } else { + } else if (reg_size == sizeof(u64)) { /* Function argument */ - if (reg_size == sizeof(u64)) { - mark_insn_zext(env, reg); - mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); - } else { - mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32); - } + mark_insn_zext(env, reg); } } @@ -12059,6 +12056,7 @@ enum { KF_ARG_RB_NODE_ID, KF_ARG_WORKQUEUE_ID, KF_ARG_RES_SPIN_LOCK_ID, + KF_ARG_TASK_WORK_ID, }; BTF_ID_LIST(kf_arg_btf_ids) @@ -12069,6 +12067,7 @@ BTF_ID(struct, bpf_rb_root) BTF_ID(struct, bpf_rb_node) BTF_ID(struct, bpf_wq) BTF_ID(struct, bpf_res_spin_lock) +BTF_ID(struct, bpf_task_work) static bool __is_kfunc_ptr_arg_type(const struct btf *btf, const struct btf_param *arg, int type) @@ -12117,6 +12116,11 @@ static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg) return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID); } +static bool is_kfunc_arg_task_work(const struct btf *btf, const struct btf_param *arg) +{ + return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TASK_WORK_ID); +} + static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_param *arg) { return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID); @@ -12204,6 +12208,7 @@ enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_WORKQUEUE, KF_ARG_PTR_TO_IRQ_FLAG, KF_ARG_PTR_TO_RES_SPIN_LOCK, + KF_ARG_PTR_TO_TASK_WORK, }; enum special_kfunc_type { @@ -12228,6 +12233,8 @@ enum special_kfunc_type { KF_bpf_rbtree_right, KF_bpf_dynptr_from_skb, KF_bpf_dynptr_from_xdp, + KF_bpf_dynptr_from_skb_meta, + KF_bpf_xdp_pull_data, KF_bpf_dynptr_slice, KF_bpf_dynptr_slice_rdwr, KF_bpf_dynptr_clone, @@ -12252,6 +12259,8 @@ enum special_kfunc_type { KF_bpf_res_spin_lock_irqsave, KF_bpf_res_spin_unlock_irqrestore, KF___bpf_trap, + KF_bpf_task_work_schedule_signal, + KF_bpf_task_work_schedule_resume, }; BTF_ID_LIST(special_kfunc_list) @@ -12277,9 +12286,13 @@ BTF_ID(func, bpf_rbtree_right) #ifdef CONFIG_NET BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) +BTF_ID(func, bpf_dynptr_from_skb_meta) +BTF_ID(func, bpf_xdp_pull_data) #else BTF_ID_UNUSED BTF_ID_UNUSED +BTF_ID_UNUSED +BTF_ID_UNUSED #endif BTF_ID(func, bpf_dynptr_slice) BTF_ID(func, bpf_dynptr_slice_rdwr) @@ -12318,6 +12331,14 @@ BTF_ID(func, bpf_res_spin_unlock) BTF_ID(func, bpf_res_spin_lock_irqsave) BTF_ID(func, bpf_res_spin_unlock_irqrestore) BTF_ID(func, __bpf_trap) +BTF_ID(func, bpf_task_work_schedule_signal) +BTF_ID(func, bpf_task_work_schedule_resume) + +static bool is_task_work_add_kfunc(u32 func_id) +{ + return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] || + func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume]; +} static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) { @@ -12349,6 +12370,11 @@ static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta) return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable]; } +static bool is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data]; +} + static enum kfunc_ptr_arg_type get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta, @@ -12408,6 +12434,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_wq(meta->btf, &args[argno])) return KF_ARG_PTR_TO_WORKQUEUE; + if (is_kfunc_arg_task_work(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_TASK_WORK; + if (is_kfunc_arg_irq_flag(meta->btf, &args[argno])) return KF_ARG_PTR_TO_IRQ_FLAG; @@ -12751,7 +12780,8 @@ static bool is_sync_callback_calling_kfunc(u32 btf_id) static bool is_async_callback_calling_kfunc(u32 btf_id) { - return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl]; + return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl] || + is_task_work_add_kfunc(btf_id); } static bool is_bpf_throw_kfunc(struct bpf_insn *insn) @@ -13132,7 +13162,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ verbose(env, "pointer in R%d isn't map pointer\n", regno); return -EINVAL; } - if (meta->map.ptr && reg->map_ptr->record->wq_off >= 0) { + if (meta->map.ptr && (reg->map_ptr->record->wq_off >= 0 || + reg->map_ptr->record->task_work_off >= 0)) { /* Use map_uid (which is unique id of inner map) to reject: * inner_map1 = bpf_map_lookup_elem(outer_map, key1) * inner_map2 = bpf_map_lookup_elem(outer_map, key2) @@ -13147,6 +13178,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ */ if (meta->map.ptr != reg->map_ptr || meta->map.uid != reg->map_uid) { + if (reg->map_ptr->record->task_work_off >= 0) { + verbose(env, + "bpf_task_work pointer in R2 map_uid=%d doesn't match map pointer in R3 map_uid=%d\n", + meta->map.uid, reg->map_uid); + return -EINVAL; + } verbose(env, "workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n", meta->map.uid, reg->map_uid); @@ -13185,6 +13222,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_REFCOUNTED_KPTR: case KF_ARG_PTR_TO_CONST_STR: case KF_ARG_PTR_TO_WORKQUEUE: + case KF_ARG_PTR_TO_TASK_WORK: case KF_ARG_PTR_TO_IRQ_FLAG: case KF_ARG_PTR_TO_RES_SPIN_LOCK: break; @@ -13253,6 +13291,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ dynptr_arg_type |= DYNPTR_TYPE_SKB; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) { dynptr_arg_type |= DYNPTR_TYPE_XDP; + } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) { + dynptr_arg_type |= DYNPTR_TYPE_SKB_META; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] && (dynptr_arg_type & MEM_UNINIT)) { enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type; @@ -13476,6 +13516,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (ret < 0) return ret; break; + case KF_ARG_PTR_TO_TASK_WORK: + if (reg->type != PTR_TO_MAP_VALUE) { + verbose(env, "arg#%d doesn't point to a map value\n", i); + return -EINVAL; + } + ret = process_task_work_func(env, regno, meta); + if (ret < 0) + return ret; + break; case KF_ARG_PTR_TO_IRQ_FLAG: if (reg->type != PTR_TO_STACK) { verbose(env, "arg#%d doesn't point to an irq flag on stack\n", i); @@ -13842,6 +13891,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } } + if (is_task_work_add_kfunc(meta.func_id)) { + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_task_work_schedule_callback_state); + if (err) { + verbose(env, "kfunc %s#%d failed callback verification\n", + func_name, meta.func_id); + return err; + } + } + rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta); @@ -13901,6 +13960,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EACCES; } + if (is_kfunc_rcu_protected(&meta) && !in_rcu_cs(env)) { + verbose(env, "kernel func %s requires RCU critical section protection\n", func_name); + return -EACCES; + } + /* In case of release function, we get register number of refcounted * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. */ @@ -14014,6 +14078,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, /* Ensures we don't access the memory after a release_reference() */ if (meta.ref_obj_id) regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; + + if (is_kfunc_rcu_protected(&meta)) + regs[BPF_REG_0].type |= MEM_RCU; } else { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].btf = desc_btf; @@ -14022,6 +14089,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache]) regs[BPF_REG_0].type |= PTR_UNTRUSTED; + else if (is_kfunc_rcu_protected(&meta)) + regs[BPF_REG_0].type |= MEM_RCU; if (is_iter_next_kfunc(&meta)) { struct bpf_reg_state *cur_iter; @@ -14066,6 +14135,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } } + if (is_kfunc_pkt_changing(&meta)) + clear_all_pkt_pointers(env); + nargs = btf_type_vlen(meta.func_proto); args = (const struct btf_param *)(meta.func_proto + 1); for (i = 0; i < nargs; i++) { @@ -15645,7 +15717,6 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) */ assign_scalar_id_before_mov(env, src_reg); copy_register_state(dst_reg, src_reg); - dst_reg->live |= REG_LIVE_WRITTEN; dst_reg->subreg_def = DEF_NOT_SUBREG; } else { /* case: R1 = (s8, s16 s32)R2 */ @@ -15664,7 +15735,6 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) if (!no_sext) dst_reg->id = 0; coerce_reg_to_size_sx(dst_reg, insn->off >> 3); - dst_reg->live |= REG_LIVE_WRITTEN; dst_reg->subreg_def = DEF_NOT_SUBREG; } else { mark_reg_unknown(env, regs, insn->dst_reg); @@ -15690,7 +15760,6 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) */ if (!is_src_reg_u32) dst_reg->id = 0; - dst_reg->live |= REG_LIVE_WRITTEN; dst_reg->subreg_def = env->insn_idx + 1; } else { /* case: W1 = (s8, s16)W2 */ @@ -15701,7 +15770,6 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) copy_register_state(dst_reg, src_reg); if (!no_sext) dst_reg->id = 0; - dst_reg->live |= REG_LIVE_WRITTEN; dst_reg->subreg_def = env->insn_idx + 1; coerce_subreg_to_size_sx(dst_reg, insn->off >> 3); } @@ -15886,6 +15954,8 @@ static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_sta */ if (tnum_is_const(t1) && tnum_is_const(t2)) return t1.value == t2.value; + if (!tnum_overlap(t1, t2)) + return 0; /* non-overlapping ranges */ if (umin1 > umax2 || umax1 < umin2) return 0; @@ -15910,6 +15980,8 @@ static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_sta */ if (tnum_is_const(t1) && tnum_is_const(t2)) return t1.value != t2.value; + if (!tnum_overlap(t1, t2)) + return 1; /* non-overlapping ranges */ if (umin1 > umax2 || umax1 < umin2) return 1; @@ -17117,9 +17189,8 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char } if (frame->in_async_callback_fn) { - /* enforce return zero from async callbacks like timer */ exit_ctx = "At async callback return"; - range = retval_range(0, 0); + range = frame->callback_ret_range; goto enforce_retval; } @@ -17258,7 +17329,7 @@ static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off) { struct bpf_subprog_info *subprog; - subprog = find_containing_subprog(env, off); + subprog = bpf_find_containing_subprog(env, off); subprog->changes_pkt_data = true; } @@ -17266,7 +17337,7 @@ static void mark_subprog_might_sleep(struct bpf_verifier_env *env, int off) { struct bpf_subprog_info *subprog; - subprog = find_containing_subprog(env, off); + subprog = bpf_find_containing_subprog(env, off); subprog->might_sleep = true; } @@ -17280,8 +17351,8 @@ static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w) { struct bpf_subprog_info *caller, *callee; - caller = find_containing_subprog(env, t); - callee = find_containing_subprog(env, w); + caller = bpf_find_containing_subprog(env, t); + callee = bpf_find_containing_subprog(env, w); caller->changes_pkt_data |= callee->changes_pkt_data; caller->might_sleep |= callee->might_sleep; } @@ -17351,7 +17422,7 @@ static void mark_calls_callback(struct bpf_verifier_env *env, int idx) env->insn_aux_data[idx].calls_callback = true; } -static bool calls_callback(struct bpf_verifier_env *env, int insn_idx) +bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx) { return env->insn_aux_data[insn_idx].calls_callback; } @@ -17783,6 +17854,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env) */ if (ret == 0 && is_kfunc_sleepable(&meta)) mark_subprog_might_sleep(env, t); + if (ret == 0 && is_kfunc_pkt_changing(&meta)) + mark_subprog_changes_pkt_data(env, t); } return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL); @@ -17825,7 +17898,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env) static int check_cfg(struct bpf_verifier_env *env) { int insn_cnt = env->prog->len; - int *insn_stack, *insn_state, *insn_postorder; + int *insn_stack, *insn_state; int ex_insn_beg, i, ret = 0; insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); @@ -17838,14 +17911,6 @@ static int check_cfg(struct bpf_verifier_env *env) return -ENOMEM; } - insn_postorder = env->cfg.insn_postorder = - kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); - if (!insn_postorder) { - kvfree(insn_state); - kvfree(insn_stack); - return -ENOMEM; - } - ex_insn_beg = env->exception_callback_subprog ? env->subprog_info[env->exception_callback_subprog].start : 0; @@ -17863,7 +17928,6 @@ walk_cfg: case DONE_EXPLORING: insn_state[t] = EXPLORED; env->cfg.cur_stack--; - insn_postorder[env->cfg.cur_postorder++] = t; break; case KEEP_EXPLORING: break; @@ -17917,6 +17981,56 @@ err_free: return ret; } +/* + * For each subprogram 'i' fill array env->cfg.insn_subprogram sub-range + * [env->subprog_info[i].postorder_start, env->subprog_info[i+1].postorder_start) + * with indices of 'i' instructions in postorder. + */ +static int compute_postorder(struct bpf_verifier_env *env) +{ + u32 cur_postorder, i, top, stack_sz, s, succ_cnt, succ[2]; + int *stack = NULL, *postorder = NULL, *state = NULL; + + postorder = kvcalloc(env->prog->len, sizeof(int), GFP_KERNEL_ACCOUNT); + state = kvcalloc(env->prog->len, sizeof(int), GFP_KERNEL_ACCOUNT); + stack = kvcalloc(env->prog->len, sizeof(int), GFP_KERNEL_ACCOUNT); + if (!postorder || !state || !stack) { + kvfree(postorder); + kvfree(state); + kvfree(stack); + return -ENOMEM; + } + cur_postorder = 0; + for (i = 0; i < env->subprog_cnt; i++) { + env->subprog_info[i].postorder_start = cur_postorder; + stack[0] = env->subprog_info[i].start; + stack_sz = 1; + do { + top = stack[stack_sz - 1]; + state[top] |= DISCOVERED; + if (state[top] & EXPLORED) { + postorder[cur_postorder++] = top; + stack_sz--; + continue; + } + succ_cnt = bpf_insn_successors(env->prog, top, succ); + for (s = 0; s < succ_cnt; ++s) { + if (!state[succ[s]]) { + stack[stack_sz++] = succ[s]; + state[succ[s]] |= DISCOVERED; + } + } + state[top] |= EXPLORED; + } while (stack_sz); + } + env->subprog_info[i].postorder_start = cur_postorder; + env->cfg.insn_postorder = postorder; + env->cfg.cur_postorder = cur_postorder; + kvfree(stack); + kvfree(state); + return 0; +} + static int check_abnormal_return(struct bpf_verifier_env *env) { int i; @@ -18449,16 +18563,15 @@ static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) } static void clean_func_state(struct bpf_verifier_env *env, - struct bpf_func_state *st) + struct bpf_func_state *st, + u32 ip) { - enum bpf_reg_liveness live; + u16 live_regs = env->insn_aux_data[ip].live_regs_before; int i, j; for (i = 0; i < BPF_REG_FP; i++) { - live = st->regs[i].live; /* liveness must not touch this register anymore */ - st->regs[i].live |= REG_LIVE_DONE; - if (!(live & REG_LIVE_READ)) + if (!(live_regs & BIT(i))) /* since the register is unused, clear its state * to make further comparison simpler */ @@ -18466,10 +18579,7 @@ static void clean_func_state(struct bpf_verifier_env *env, } for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) { - live = st->stack[i].spilled_ptr.live; - /* liveness must not touch this stack slot anymore */ - st->stack[i].spilled_ptr.live |= REG_LIVE_DONE; - if (!(live & REG_LIVE_READ)) { + if (!bpf_stack_slot_alive(env, st->frameno, i)) { __mark_reg_not_init(env, &st->stack[i].spilled_ptr); for (j = 0; j < BPF_REG_SIZE; j++) st->stack[i].slot_type[j] = STACK_INVALID; @@ -18480,10 +18590,14 @@ static void clean_func_state(struct bpf_verifier_env *env, static void clean_verifier_state(struct bpf_verifier_env *env, struct bpf_verifier_state *st) { - int i; + int i, ip; - for (i = 0; i <= st->curframe; i++) - clean_func_state(env, st->frame[i]); + bpf_live_stack_query_init(env, st); + st->cleaned = true; + for (i = 0; i <= st->curframe; i++) { + ip = frame_insn_idx(st, i); + clean_func_state(env, st->frame[i], ip); + } } /* the parentage chains form a tree. @@ -18494,25 +18608,23 @@ static void clean_verifier_state(struct bpf_verifier_env *env, * but a lot of states will get revised from liveness point of view when * the verifier explores other branches. * Example: - * 1: r0 = 1 + * 1: *(u64)(r10 - 8) = 1 * 2: if r1 == 100 goto pc+1 - * 3: r0 = 2 - * 4: exit - * when the verifier reaches exit insn the register r0 in the state list of - * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch - * of insn 2 and goes exploring further. At the insn 4 it will walk the - * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ. + * 3: *(u64)(r10 - 8) = 2 + * 4: r0 = *(u64)(r10 - 8) + * 5: exit + * when the verifier reaches exit insn the stack slot -8 in the state list of + * insn 2 is not yet marked alive. Then the verifier pops the other_branch + * of insn 2 and goes exploring further. After the insn 4 read, liveness + * analysis would propagate read mark for -8 at insn 2. * * Since the verifier pushes the branch states as it sees them while exploring * the program the condition of walking the branch instruction for the second * time means that all states below this branch were already explored and * their final liveness marks are already propagated. * Hence when the verifier completes the search of state list in is_state_visited() - * we can call this clean_live_states() function to mark all liveness states - * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state' - * will not be used. - * This function also clears the registers and stack for states that !READ - * to simplify state merging. + * we can call this clean_live_states() function to clear dead the registers and stack + * slots to simplify state merging. * * Important note here that walking the same branch instruction in the callee * doesn't meant that the states are DONE. The verifier has to compare @@ -18532,7 +18644,7 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, if (sl->state.insn_idx != insn || !same_callsites(&sl->state, cur)) continue; - if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) + if (sl->state.cleaned) /* all regs in this state in all frames were already marked */ continue; if (incomplete_read_marks(env, &sl->state)) @@ -18564,9 +18676,6 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, if (exact == EXACT) return regs_exact(rold, rcur, idmap); - if (!(rold->live & REG_LIVE_READ) && exact == NOT_EXACT) - /* explored state didn't use this */ - return true; if (rold->type == NOT_INIT) { if (exact == NOT_EXACT || rcur->type == NOT_INIT) /* explored state can't have used this */ @@ -18690,7 +18799,6 @@ static struct bpf_reg_state unbound_reg; static __init int unbound_reg_init(void) { __mark_reg_unknown_imprecise(&unbound_reg); - unbound_reg.live |= REG_LIVE_READ; return 0; } late_initcall(unbound_reg_init); @@ -18743,13 +18851,6 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, cur->stack[spi].slot_type[i % BPF_REG_SIZE])) return false; - if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) - && exact == NOT_EXACT) { - i += BPF_REG_SIZE - 1; - /* explored state didn't use this */ - continue; - } - if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) continue; @@ -18992,91 +19093,6 @@ static bool states_equal(struct bpf_verifier_env *env, return true; } -/* Return 0 if no propagation happened. Return negative error code if error - * happened. Otherwise, return the propagated bit. - */ -static int propagate_liveness_reg(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, - struct bpf_reg_state *parent_reg) -{ - u8 parent_flag = parent_reg->live & REG_LIVE_READ; - u8 flag = reg->live & REG_LIVE_READ; - int err; - - /* When comes here, read flags of PARENT_REG or REG could be any of - * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need - * of propagation if PARENT_REG has strongest REG_LIVE_READ64. - */ - if (parent_flag == REG_LIVE_READ64 || - /* Or if there is no read flag from REG. */ - !flag || - /* Or if the read flag from REG is the same as PARENT_REG. */ - parent_flag == flag) - return 0; - - err = mark_reg_read(env, reg, parent_reg, flag); - if (err) - return err; - - return flag; -} - -/* A write screens off any subsequent reads; but write marks come from the - * straight-line code between a state and its parent. When we arrive at an - * equivalent state (jump target or such) we didn't arrive by the straight-line - * code, so read marks in the state must propagate to the parent regardless - * of the state's write marks. That's what 'parent == state->parent' comparison - * in mark_reg_read() is for. - */ -static int propagate_liveness(struct bpf_verifier_env *env, - const struct bpf_verifier_state *vstate, - struct bpf_verifier_state *vparent, - bool *changed) -{ - struct bpf_reg_state *state_reg, *parent_reg; - struct bpf_func_state *state, *parent; - int i, frame, err = 0; - bool tmp = false; - - changed = changed ?: &tmp; - if (vparent->curframe != vstate->curframe) { - WARN(1, "propagate_live: parent frame %d current frame %d\n", - vparent->curframe, vstate->curframe); - return -EFAULT; - } - /* Propagate read liveness of registers... */ - BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); - for (frame = 0; frame <= vstate->curframe; frame++) { - parent = vparent->frame[frame]; - state = vstate->frame[frame]; - parent_reg = parent->regs; - state_reg = state->regs; - /* We don't need to worry about FP liveness, it's read-only */ - for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { - err = propagate_liveness_reg(env, &state_reg[i], - &parent_reg[i]); - if (err < 0) - return err; - *changed |= err > 0; - if (err == REG_LIVE_READ64) - mark_insn_zext(env, &parent_reg[i]); - } - - /* Propagate stack slots. */ - for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && - i < parent->allocated_stack / BPF_REG_SIZE; i++) { - parent_reg = &parent->stack[i].spilled_ptr; - state_reg = &state->stack[i].spilled_ptr; - err = propagate_liveness_reg(env, state_reg, - parent_reg); - *changed |= err > 0; - if (err < 0) - return err; - } - } - return 0; -} - /* find precise scalars in the previous equivalent state and * propagate them into the current state */ @@ -19096,8 +19112,7 @@ static int propagate_precision(struct bpf_verifier_env *env, first = true; for (i = 0; i < BPF_REG_FP; i++, state_reg++) { if (state_reg->type != SCALAR_VALUE || - !state_reg->precise || - !(state_reg->live & REG_LIVE_READ)) + !state_reg->precise) continue; if (env->log.level & BPF_LOG_LEVEL2) { if (first) @@ -19114,8 +19129,7 @@ static int propagate_precision(struct bpf_verifier_env *env, continue; state_reg = &state->stack[i].spilled_ptr; if (state_reg->type != SCALAR_VALUE || - !state_reg->precise || - !(state_reg->live & REG_LIVE_READ)) + !state_reg->precise) continue; if (env->log.level & BPF_LOG_LEVEL2) { if (first) @@ -19165,9 +19179,6 @@ static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visi changed = false; for (backedge = visit->backedges; backedge; backedge = backedge->next) { st = &backedge->state; - err = propagate_liveness(env, st->equal_state, st, &changed); - if (err) - return err; err = propagate_precision(env, st->equal_state, st, &changed); if (err) return err; @@ -19191,7 +19202,7 @@ static bool states_maybe_looping(struct bpf_verifier_state *old, fcur = cur->frame[fr]; for (i = 0; i < MAX_BPF_REG; i++) if (memcmp(&fold->regs[i], &fcur->regs[i], - offsetof(struct bpf_reg_state, parent))) + offsetof(struct bpf_reg_state, frameno))) return false; return true; } @@ -19289,7 +19300,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) struct bpf_verifier_state_list *sl; struct bpf_verifier_state *cur = env->cur_state, *new; bool force_new_state, add_new_state, loop; - int i, j, n, err, states_cnt = 0; + int n, err, states_cnt = 0; struct list_head *pos, *tmp, *head; force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) || @@ -19404,7 +19415,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) goto hit; } } - if (calls_callback(env, insn_idx)) { + if (bpf_calls_callback(env, insn_idx)) { if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) goto hit; goto skip_inf_loop_check; @@ -19447,25 +19458,15 @@ skip_inf_loop_check: if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) { hit: sl->hit_cnt++; - /* reached equivalent register/stack state, - * prune the search. - * Registers read by the continuation are read by us. - * If we have any write marks in env->cur_state, they - * will prevent corresponding reads in the continuation - * from reaching our parent (an explored_state). Our - * own state will get the read marks recorded, but - * they'll be immediately forgotten as we're pruning - * this state and will pop a new one. - */ - err = propagate_liveness(env, &sl->state, cur, NULL); /* if previous state reached the exit with precision and * current state is equivalent to it (except precision marks) * the precision needs to be propagated back in * the current state. */ + err = 0; if (is_jmp_point(env, env->insn_idx)) - err = err ? : push_jmp_history(env, cur, 0, 0); + err = push_jmp_history(env, cur, 0, 0); err = err ? : propagate_precision(env, &sl->state, cur, NULL); if (err) return err; @@ -19553,7 +19554,7 @@ hit: err = err ?: add_scc_backedge(env, &sl->state, backedge); if (err) { free_verifier_state(&backedge->state, false); - kvfree(backedge); + kfree(backedge); return err; } } @@ -19636,7 +19637,7 @@ miss: err = maybe_enter_scc(env, new); if (err) { free_verifier_state(new, false); - kvfree(new_sl); + kfree(new_sl); return err; } @@ -19645,38 +19646,6 @@ miss: cur->dfs_depth = new->dfs_depth + 1; clear_jmp_history(cur); list_add(&new_sl->node, head); - - /* connect new state to parentage chain. Current frame needs all - * registers connected. Only r6 - r9 of the callers are alive (pushed - * to the stack implicitly by JITs) so in callers' frames connect just - * r6 - r9 as an optimization. Callers will have r1 - r5 connected to - * the state of the call instruction (with WRITTEN set), and r0 comes - * from callee with its full parentage chain, anyway. - */ - /* clear write marks in current state: the writes we did are not writes - * our child did, so they don't screen off its reads from us. - * (There are no read marks in current state, because reads always mark - * their parent and current state never has children yet. Only - * explored_states can get read marks.) - */ - for (j = 0; j <= cur->curframe; j++) { - for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) - cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i]; - for (i = 0; i < BPF_REG_FP; i++) - cur->frame[j]->regs[i].live = REG_LIVE_NONE; - } - - /* all stack frames are accessible from callee, clear them all */ - for (j = 0; j <= cur->curframe; j++) { - struct bpf_func_state *frame = cur->frame[j]; - struct bpf_func_state *newframe = new->frame[j]; - - for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) { - frame->stack[i].spilled_ptr.live = REG_LIVE_NONE; - frame->stack[i].spilled_ptr.parent = - &newframe->stack[i].spilled_ptr; - } - } return 0; } @@ -19812,6 +19781,9 @@ static int process_bpf_exit_full(struct bpf_verifier_env *env, return PROCESS_BPF_EXIT; if (env->cur_state->curframe) { + err = bpf_update_live_stack(env); + if (err) + return err; /* exit from nested function */ err = prepare_func_exit(env, &env->insn_idx); if (err) @@ -19997,7 +19969,7 @@ static int do_check(struct bpf_verifier_env *env) for (;;) { struct bpf_insn *insn; struct bpf_insn_aux_data *insn_aux; - int err; + int err, marks_err; /* reset current history entry on each new instruction */ env->cur_hist_ent = NULL; @@ -20090,7 +20062,15 @@ static int do_check(struct bpf_verifier_env *env) if (state->speculative && insn_aux->nospec) goto process_bpf_exit; + err = bpf_reset_stack_write_marks(env, env->insn_idx); + if (err) + return err; err = do_check_insn(env, &do_print_state); + if (err >= 0 || error_recoverable_with_nospec(err)) { + marks_err = bpf_commit_stack_write_marks(env); + if (marks_err) + return marks_err; + } if (error_recoverable_with_nospec(err) && state->speculative) { /* Prevent this speculative path from ever reaching the * insn that would have been unsafe to execute. @@ -20131,6 +20111,9 @@ process_bpf_exit: err = update_branch_counts(env, env->cur_state); if (err) return err; + err = bpf_update_live_stack(env); + if (err) + return err; err = pop_stack(env, &prev_insn_idx, &env->insn_idx, pop_log); if (err < 0) { @@ -20193,8 +20176,11 @@ static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf) if (env->used_btfs[i].btf == btf) return i; - if (env->used_btf_cnt >= MAX_USED_BTFS) + if (env->used_btf_cnt >= MAX_USED_BTFS) { + verbose(env, "The total number of btfs per program has reached the limit of %u\n", + MAX_USED_BTFS); return -E2BIG; + } btf_get(btf); @@ -20360,6 +20346,12 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, { enum bpf_prog_type prog_type = resolve_prog_type(prog); + if (map->excl_prog_sha && + memcmp(map->excl_prog_sha, prog->digest, SHA256_DIGEST_SIZE)) { + verbose(env, "program's hash doesn't match map's excl_prog_hash\n"); + return -EACCES; + } + if (btf_record_has_field(map->record, BPF_LIST_HEAD) || btf_record_has_field(map->record, BPF_RB_ROOT)) { if (is_tracing_prog_type(prog_type)) { @@ -20699,12 +20691,11 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) * [0, off) and [off, end) to new locations, so the patched range stays zero */ static void adjust_insn_aux_data(struct bpf_verifier_env *env, - struct bpf_insn_aux_data *new_data, struct bpf_prog *new_prog, u32 off, u32 cnt) { - struct bpf_insn_aux_data *old_data = env->insn_aux_data; + struct bpf_insn_aux_data *data = env->insn_aux_data; struct bpf_insn *insn = new_prog->insnsi; - u32 old_seen = old_data[off].seen; + u32 old_seen = data[off].seen; u32 prog_len; int i; @@ -20712,22 +20703,20 @@ static void adjust_insn_aux_data(struct bpf_verifier_env *env, * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the * original insn at old prog. */ - old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1); + data[off].zext_dst = insn_has_def32(insn + off + cnt - 1); if (cnt == 1) return; prog_len = new_prog->len; - memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); - memcpy(new_data + off + cnt - 1, old_data + off, - sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); + memmove(data + off + cnt - 1, data + off, + sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); + memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1)); for (i = off; i < off + cnt - 1; i++) { /* Expand insni[off]'s seen count to the patched range. */ - new_data[i].seen = old_seen; - new_data[i].zext_dst = insn_has_def32(env, insn + i); + data[i].seen = old_seen; + data[i].zext_dst = insn_has_def32(insn + i); } - env->insn_aux_data = new_data; - vfree(old_data); } static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len) @@ -20765,10 +20754,14 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of struct bpf_insn_aux_data *new_data = NULL; if (len > 1) { - new_data = vzalloc(array_size(env->prog->len + len - 1, - sizeof(struct bpf_insn_aux_data))); + new_data = vrealloc(env->insn_aux_data, + array_size(env->prog->len + len - 1, + sizeof(struct bpf_insn_aux_data)), + GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!new_data) return NULL; + + env->insn_aux_data = new_data; } new_prog = bpf_patch_insn_single(env->prog, off, patch, len); @@ -20777,10 +20770,9 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of verbose(env, "insn %d cannot be patched due to 16-bit range\n", env->insn_aux_data[off].orig_idx); - vfree(new_data); return NULL; } - adjust_insn_aux_data(env, new_data, new_prog, off, len); + adjust_insn_aux_data(env, new_prog, off, len); adjust_subprog_starts(env, off, len); adjust_poke_descs(new_prog, off, len); return new_prog; @@ -21131,7 +21123,7 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, * BPF_STX + SRC_OP, so it is safe to pass NULL * here. */ - if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) { + if (is_reg64(&insn, load_reg, NULL, DST_OP)) { if (class == BPF_LD && BPF_MODE(code) == BPF_IMM) i++; @@ -21400,10 +21392,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) continue; case PTR_TO_ARENA: if (BPF_MODE(insn->code) == BPF_MEMSX) { - verbose(env, "sign extending loads from arena are not supported yet\n"); - return -EOPNOTSUPP; + if (!bpf_jit_supports_insn(insn, true)) { + verbose(env, "sign extending loads from arena are not supported yet\n"); + return -EOPNOTSUPP; + } + insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code); + } else { + insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code); } - insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code); env->prog->aux->num_exentries++; continue; default: @@ -21578,6 +21574,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->func_info_cnt = prog->aux->func_info_cnt; func[i]->aux->poke_tab = prog->aux->poke_tab; func[i]->aux->size_poke_tab = prog->aux->size_poke_tab; + func[i]->aux->main_prog_aux = prog->aux; for (j = 0; j < prog->aux->size_poke_tab; j++) { struct bpf_jit_poke_descriptor *poke; @@ -21608,6 +21605,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) if (BPF_CLASS(insn->code) == BPF_LDX && (BPF_MODE(insn->code) == BPF_PROBE_MEM || BPF_MODE(insn->code) == BPF_PROBE_MEM32 || + BPF_MODE(insn->code) == BPF_PROBE_MEM32SX || BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) num_exentries++; if ((BPF_CLASS(insn->code) == BPF_STX || @@ -23855,6 +23853,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, BTF_SET_START(btf_id_deny) BTF_ID_UNUSED #ifdef CONFIG_SMP +BTF_ID(func, ___migrate_enable) BTF_ID(func, migrate_disable) BTF_ID(func, migrate_enable) #endif @@ -24084,67 +24083,6 @@ static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, return 0; } -static bool can_fallthrough(struct bpf_insn *insn) -{ - u8 class = BPF_CLASS(insn->code); - u8 opcode = BPF_OP(insn->code); - - if (class != BPF_JMP && class != BPF_JMP32) - return true; - - if (opcode == BPF_EXIT || opcode == BPF_JA) - return false; - - return true; -} - -static bool can_jump(struct bpf_insn *insn) -{ - u8 class = BPF_CLASS(insn->code); - u8 opcode = BPF_OP(insn->code); - - if (class != BPF_JMP && class != BPF_JMP32) - return false; - - switch (opcode) { - case BPF_JA: - case BPF_JEQ: - case BPF_JNE: - case BPF_JLT: - case BPF_JLE: - case BPF_JGT: - case BPF_JGE: - case BPF_JSGT: - case BPF_JSGE: - case BPF_JSLT: - case BPF_JSLE: - case BPF_JCOND: - case BPF_JSET: - return true; - } - - return false; -} - -static int insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]) -{ - struct bpf_insn *insn = &prog->insnsi[idx]; - int i = 0, insn_sz; - u32 dst; - - insn_sz = bpf_is_ldimm64(insn) ? 2 : 1; - if (can_fallthrough(insn) && idx + 1 < prog->len) - succ[i++] = idx + insn_sz; - - if (can_jump(insn)) { - dst = idx + jmp_offset(insn) + 1; - if (i == 0 || succ[0] != dst) - succ[i++] = dst; - } - - return i; -} - /* Each field is a register bitmask */ struct insn_live_regs { u16 use; /* registers read by instruction */ @@ -24342,7 +24280,7 @@ static int compute_live_registers(struct bpf_verifier_env *env) u16 new_out = 0; u16 new_in = 0; - succ_num = insn_successors(env->prog, insn_idx, succ); + succ_num = bpf_insn_successors(env->prog, insn_idx, succ); for (int s = 0; s < succ_num; ++s) new_out |= state[succ[s]].in; new_in = (new_out & ~live->def) | live->use; @@ -24379,9 +24317,6 @@ static int compute_live_registers(struct bpf_verifier_env *env) out: kvfree(state); - kvfree(env->cfg.insn_postorder); - env->cfg.insn_postorder = NULL; - env->cfg.cur_postorder = 0; return err; } @@ -24511,7 +24446,7 @@ dfs_continue: stack[stack_sz++] = w; } /* Visit 'w' successors */ - succ_cnt = insn_successors(env->prog, w, succ); + succ_cnt = bpf_insn_successors(env->prog, w, succ); for (j = 0; j < succ_cnt; ++j) { if (pre[succ[j]]) { low[w] = min(low[w], low[succ[j]]); @@ -24684,6 +24619,14 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret < 0) goto skip_full_check; + ret = compute_postorder(env); + if (ret < 0) + goto skip_full_check; + + ret = bpf_stack_liveness_init(env); + if (ret) + goto skip_full_check; + ret = check_attach_btf_id(env); if (ret) goto skip_full_check; @@ -24833,6 +24776,7 @@ err_unlock: mutex_unlock(&bpf_verifier_lock); vfree(env->insn_aux_data); err_free_env: + bpf_stack_liveness_free(env); kvfree(env->cfg.insn_postorder); kvfree(env->scc_info); kvfree(env); |