Merge branch 'for-6.0-fixes' into for-6.1

for-6.0 has the following fix for cgroup_get_from_id().

  836ac87d ("cgroup: fix cgroup_get_from_id")

which conflicts with the following two commits in for-6.1.

  4534dee9 ("cgroup: cgroup: Honor caller's cgroup NS when resolving cgroup id")
  fa7e439c ("cgroup: Homogenize cgroup_get_from_id() return value")

While the resolution is straightforward, the code ends up pretty ugly
afterwards. Let's pull for-6.0-fixes into for-6.1 so that the code can be
fixed up there.

Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
Tejun Heo 2022-09-23 07:19:38 -10:00
commit 026e14a276
1032 changed files with 13399 additions and 6955 deletions

View file

@ -102,6 +102,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, 0);
if (ret < 0) {
audit_mark->path = NULL;
fsnotify_put_mark(&audit_mark->mark);
audit_mark = ERR_PTR(ret);
}

View file

@ -1940,6 +1940,7 @@ void __audit_uring_exit(int success, long code)
goto out;
}
audit_return_fixup(ctx, success, code);
if (ctx->context == AUDIT_CTX_SYSCALL) {
/*
* NOTE: See the note in __audit_uring_entry() about the case
@ -1981,7 +1982,6 @@ void __audit_uring_exit(int success, long code)
audit_filter_inodes(current, ctx);
if (ctx->current_state != AUDIT_STATE_RECORD)
goto out;
audit_return_fixup(ctx, success, code);
audit_log_exit();
out:
@ -2065,13 +2065,13 @@ void __audit_syscall_exit(int success, long return_code)
if (!list_empty(&context->killed_trees))
audit_kill_trees(context);
audit_return_fixup(context, success, return_code);
/* run through both filters to ensure we set the filterkey properly */
audit_filter_syscall(current, context);
audit_filter_inodes(current, context);
if (context->current_state < AUDIT_STATE_RECORD)
goto out;
audit_return_fixup(context, success, return_code);
audit_log_exit();
out:

View file

@ -921,8 +921,10 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
pos++;
}
}
/* no link or prog match, skip the cgroup of this layer */
continue;
found:
BUG_ON(!cg);
progs = rcu_dereference_protected(
desc->bpf.effective[atype],
lockdep_is_held(&cgroup_mutex));

View file

@ -971,7 +971,7 @@ pure_initcall(bpf_jit_charge_init);
int bpf_jit_charge_modmem(u32 size)
{
if (atomic_long_add_return(size, &bpf_jit_current) > bpf_jit_limit) {
if (atomic_long_add_return(size, &bpf_jit_current) > READ_ONCE(bpf_jit_limit)) {
if (!bpf_capable()) {
atomic_long_sub(size, &bpf_jit_current);
return -EPERM;

View file

@ -24,7 +24,7 @@ void bpf_sk_reuseport_detach(struct sock *sk)
struct sock __rcu **socks;
write_lock_bh(&sk->sk_callback_lock);
socks = __rcu_dereference_sk_user_data_with_flags(sk, SK_USER_DATA_BPF);
socks = __locked_read_sk_user_data_with_flags(sk, SK_USER_DATA_BPF);
if (socks) {
WRITE_ONCE(sk->sk_user_data, NULL);
/*

View file

@ -5197,7 +5197,7 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_sys_bpf:
return &bpf_sys_bpf_proto;
return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto;
case BPF_FUNC_btf_find_by_name_kind:
return &bpf_btf_find_by_name_kind_proto;
case BPF_FUNC_sys_close:

View file

@ -6066,6 +6066,9 @@ skip_type_check:
return -EACCES;
}
meta->mem_size = reg->var_off.value;
err = mark_chain_precision(env, regno);
if (err)
return err;
break;
case ARG_PTR_TO_INT:
case ARG_PTR_TO_LONG:
@ -7030,8 +7033,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
struct bpf_reg_state *regs = cur_regs(env), *reg;
struct bpf_map *map = meta->map_ptr;
struct tnum range;
u64 val;
u64 val, max;
int err;
if (func_id != BPF_FUNC_tail_call)
@ -7041,10 +7043,11 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
return -EINVAL;
}
range = tnum_range(0, map->max_entries - 1);
reg = &regs[BPF_REG_3];
val = reg->var_off.value;
max = map->max_entries;
if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
if (!(register_is_const(reg) && val < max)) {
bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
return 0;
}
@ -7052,8 +7055,6 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
err = mark_chain_precision(env, BPF_REG_3);
if (err)
return err;
val = reg->var_off.value;
if (bpf_map_key_unseen(aux))
bpf_map_key_store(aux, val);
else if (!bpf_map_key_poisoned(aux) &&

View file

@ -6072,6 +6072,9 @@ struct cgroup *cgroup_get_from_id(u64 id)
if (!kn)
goto out;
if (kernfs_type(kn) != KERNFS_DIR)
goto put;
rcu_read_lock();
cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
@ -6079,6 +6082,7 @@ struct cgroup *cgroup_get_from_id(u64 id)
cgrp = NULL;
rcu_read_unlock();
put:
kernfs_put(kn);
if (!cgrp)

View file

@ -494,6 +494,7 @@ static int __init crash_save_vmcoreinfo_init(void)
#ifdef CONFIG_KALLSYMS
VMCOREINFO_SYMBOL(kallsyms_names);
VMCOREINFO_SYMBOL(kallsyms_num_syms);
VMCOREINFO_SYMBOL(kallsyms_token_table);
VMCOREINFO_SYMBOL(kallsyms_token_index);
#ifdef CONFIG_KALLSYMS_BASE_RELATIVE

View file

@ -1707,11 +1707,12 @@ static struct kprobe *__disable_kprobe(struct kprobe *p)
/* Try to disarm and disable this/parent probe */
if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
/*
* If 'kprobes_all_disarmed' is set, 'orig_p'
* should have already been disarmed, so
* skip unneed disarming process.
* Don't be lazy here. Even if 'kprobes_all_disarmed'
* is false, 'orig_p' might not have been armed yet.
* Note arm_all_kprobes() __tries__ to arm all kprobes
* on the best effort basis.
*/
if (!kprobes_all_disarmed) {
if (!kprobes_all_disarmed && !kprobe_disabled(orig_p)) {
ret = disarm_kprobe(orig_p, true);
if (ret) {
p->flags &= ~KPROBE_FLAG_DISABLED;

View file

@ -2099,7 +2099,7 @@ static int find_module_sections(struct module *mod, struct load_info *info)
sizeof(*mod->static_call_sites),
&mod->num_static_call_sites);
#endif
#ifdef CONFIG_KUNIT
#if IS_ENABLED(CONFIG_KUNIT)
mod->kunit_suites = section_objs(info, ".kunit_test_suites",
sizeof(*mod->kunit_suites),
&mod->num_kunit_suites);

View file

@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_
prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags))
ret = (*action)(&wbq_entry->key, mode);
} while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
} while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
finish_wait(wq_head, &wbq_entry->wq_entry);

View file

@ -277,6 +277,7 @@ COND_SYSCALL(landlock_restrict_self);
/* mm/fadvise.c */
COND_SYSCALL(fadvise64_64);
COND_SYSCALL_COMPAT(fadvise64_64);
/* mm/, CONFIG_MMU only */
COND_SYSCALL(swapon);

View file

@ -1861,8 +1861,6 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
ftrace_hash_rec_update_modify(ops, filter_hash, 1);
}
static bool ops_references_ip(struct ftrace_ops *ops, unsigned long ip);
/*
* Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
* or no-needed to update, -EBUSY if it detects a conflict of the flag
@ -2974,6 +2972,16 @@ int ftrace_startup(struct ftrace_ops *ops, int command)
ftrace_startup_enable(command);
/*
* If ftrace is in an undefined state, we just remove ops from list
* to prevent the NULL pointer, instead of totally rolling it back and
* free trampoline, because those actions could cause further damage.
*/
if (unlikely(ftrace_disabled)) {
__unregister_ftrace_function(ops);
return -ENODEV;
}
ops->flags &= ~FTRACE_OPS_FL_ADDING;
return 0;
@ -3108,49 +3116,6 @@ static inline int ops_traces_mod(struct ftrace_ops *ops)
ftrace_hash_empty(ops->func_hash->notrace_hash);
}
/*
* Check if the current ops references the given ip.
*
* If the ops traces all functions, then it was already accounted for.
* If the ops does not trace the current record function, skip it.
* If the ops ignores the function via notrace filter, skip it.
*/
static bool
ops_references_ip(struct ftrace_ops *ops, unsigned long ip)
{
/* If ops isn't enabled, ignore it */
if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
return false;
/* If ops traces all then it includes this function */
if (ops_traces_mod(ops))
return true;
/* The function must be in the filter */
if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
!__ftrace_lookup_ip(ops->func_hash->filter_hash, ip))
return false;
/* If in notrace hash, we ignore it too */
if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip))
return false;
return true;
}
/*
* Check if the current ops references the record.
*
* If the ops traces all functions, then it was already accounted for.
* If the ops does not trace the current record function, skip it.
* If the ops ignores the function via notrace filter, skip it.
*/
static bool
ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
{
return ops_references_ip(ops, rec->ip);
}
static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
{
bool init_nop = ftrace_need_init_nop();
@ -6812,6 +6777,38 @@ static int ftrace_get_trampoline_kallsym(unsigned int symnum,
return -ERANGE;
}
#if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) || defined(CONFIG_MODULES)
/*
* Check if the current ops references the given ip.
*
* If the ops traces all functions, then it was already accounted for.
* If the ops does not trace the current record function, skip it.
* If the ops ignores the function via notrace filter, skip it.
*/
static bool
ops_references_ip(struct ftrace_ops *ops, unsigned long ip)
{
/* If ops isn't enabled, ignore it */
if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
return false;
/* If ops traces all then it includes this function */
if (ops_traces_mod(ops))
return true;
/* The function must be in the filter */
if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
!__ftrace_lookup_ip(ops->func_hash->filter_hash, ip))
return false;
/* If in notrace hash, we ignore it too */
if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip))
return false;
return true;
}
#endif
#ifdef CONFIG_MODULES
#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
@ -6824,7 +6821,7 @@ static int referenced_filters(struct dyn_ftrace *rec)
int cnt = 0;
for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) {
if (ops_references_rec(ops, rec)) {
if (ops_references_ip(ops, rec->ip)) {
if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_DIRECT))
continue;
if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_IPMODIFY))

View file

@ -227,6 +227,7 @@ static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
struct probe_arg *parg = &ep->tp.args[i];
struct ftrace_event_field *field;
struct list_head *head;
int ret = -ENOENT;
head = trace_get_fields(ep->event);
list_for_each_entry(field, head, link) {
@ -236,9 +237,20 @@ static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
return 0;
}
}
/*
* Argument not found on event. But allow for comm and COMM
* to be used to get the current->comm.
*/
if (strcmp(parg->code->data, "COMM") == 0 ||
strcmp(parg->code->data, "comm") == 0) {
parg->code->op = FETCH_OP_COMM;
ret = 0;
}
kfree(parg->code->data);
parg->code->data = NULL;
return -ENOENT;
return ret;
}
static int eprobe_event_define_fields(struct trace_event_call *event_call)
@ -311,6 +323,27 @@ static unsigned long get_event_field(struct fetch_insn *code, void *rec)
addr = rec + field->offset;
if (is_string_field(field)) {
switch (field->filter_type) {
case FILTER_DYN_STRING:
val = (unsigned long)(rec + (*(unsigned int *)addr & 0xffff));
break;
case FILTER_RDYN_STRING:
val = (unsigned long)(addr + (*(unsigned int *)addr & 0xffff));
break;
case FILTER_STATIC_STRING:
val = (unsigned long)addr;
break;
case FILTER_PTR_STRING:
val = (unsigned long)(*(char *)addr);
break;
default:
WARN_ON_ONCE(1);
return 0;
}
return val;
}
switch (field->size) {
case 1:
if (field->is_signed)
@ -342,16 +375,38 @@ static unsigned long get_event_field(struct fetch_insn *code, void *rec)
static int get_eprobe_size(struct trace_probe *tp, void *rec)
{
struct fetch_insn *code;
struct probe_arg *arg;
int i, len, ret = 0;
for (i = 0; i < tp->nr_args; i++) {
arg = tp->args + i;
if (unlikely(arg->dynamic)) {
if (arg->dynamic) {
unsigned long val;
val = get_event_field(arg->code, rec);
len = process_fetch_insn_bottom(arg->code + 1, val, NULL, NULL);
code = arg->code;
retry:
switch (code->op) {
case FETCH_OP_TP_ARG:
val = get_event_field(code, rec);
break;
case FETCH_OP_IMM:
val = code->immediate;
break;
case FETCH_OP_COMM:
val = (unsigned long)current->comm;
break;
case FETCH_OP_DATA:
val = (unsigned long)code->data;
break;
case FETCH_NOP_SYMBOL: /* Ignore a place holder */
code++;
goto retry;
default:
continue;
}
code++;
len = process_fetch_insn_bottom(code, val, NULL, NULL);
if (len > 0)
ret += len;
}
@ -369,8 +424,28 @@ process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
{
unsigned long val;
val = get_event_field(code, rec);
return process_fetch_insn_bottom(code + 1, val, dest, base);
retry:
switch (code->op) {
case FETCH_OP_TP_ARG:
val = get_event_field(code, rec);
break;
case FETCH_OP_IMM:
val = code->immediate;
break;
case FETCH_OP_COMM:
val = (unsigned long)current->comm;
break;
case FETCH_OP_DATA:
val = (unsigned long)code->data;
break;
case FETCH_NOP_SYMBOL: /* Ignore a place holder */
code++;
goto retry;
default:
return -EILSEQ;
}
code++;
return process_fetch_insn_bottom(code, val, dest, base);
}
NOKPROBE_SYMBOL(process_fetch_insn)
@ -845,6 +920,10 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[
trace_probe_log_err(0, BAD_ATTACH_ARG);
}
/* Handle symbols "@" */
if (!ret)
ret = traceprobe_update_arg(&ep->tp.args[i]);
return ret;
}
@ -883,7 +962,7 @@ static int __trace_eprobe_create(int argc, const char *argv[])
trace_probe_log_set_index(1);
sys_event = argv[1];
ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, 0);
if (!sys_event || !sys_name) {
if (ret || !sys_event || !sys_name) {
trace_probe_log_err(0, NO_EVENT_INFO);
goto parse_error;
}

View file

@ -157,7 +157,7 @@ static void perf_trace_event_unreg(struct perf_event *p_event)
int i;
if (--tp_event->perf_refcount > 0)
goto out;
return;
tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL);
@ -176,8 +176,6 @@ static void perf_trace_event_unreg(struct perf_event *p_event)
perf_trace_buf[i] = NULL;
}
}
out:
trace_event_put_ref(tp_event);
}
static int perf_trace_event_open(struct perf_event *p_event)
@ -241,6 +239,7 @@ void perf_trace_destroy(struct perf_event *p_event)
mutex_lock(&event_mutex);
perf_trace_event_close(p_event);
perf_trace_event_unreg(p_event);
trace_event_put_ref(p_event->tp_event);
mutex_unlock(&event_mutex);
}
@ -292,6 +291,7 @@ void perf_kprobe_destroy(struct perf_event *p_event)
mutex_lock(&event_mutex);
perf_trace_event_close(p_event);
perf_trace_event_unreg(p_event);
trace_event_put_ref(p_event->tp_event);
mutex_unlock(&event_mutex);
destroy_local_trace_kprobe(p_event->tp_event);
@ -347,6 +347,7 @@ void perf_uprobe_destroy(struct perf_event *p_event)
mutex_lock(&event_mutex);
perf_trace_event_close(p_event);
perf_trace_event_unreg(p_event);
trace_event_put_ref(p_event->tp_event);
mutex_unlock(&event_mutex);
destroy_local_trace_uprobe(p_event->tp_event);
}

View file

@ -176,6 +176,7 @@ static int trace_define_generic_fields(void)
__generic_field(int, CPU, FILTER_CPU);
__generic_field(int, cpu, FILTER_CPU);
__generic_field(int, common_cpu, FILTER_CPU);
__generic_field(char *, COMM, FILTER_COMM);
__generic_field(char *, comm, FILTER_COMM);

View file

@ -283,7 +283,14 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
int ret = 0;
int len;
if (strcmp(arg, "retval") == 0) {
if (flags & TPARG_FL_TPOINT) {
if (code->data)
return -EFAULT;
code->data = kstrdup(arg, GFP_KERNEL);
if (!code->data)
return -ENOMEM;
code->op = FETCH_OP_TP_ARG;
} else if (strcmp(arg, "retval") == 0) {
if (flags & TPARG_FL_RETURN) {
code->op = FETCH_OP_RETVAL;
} else {
@ -307,7 +314,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
}
} else
goto inval_var;
} else if (strcmp(arg, "comm") == 0) {
} else if (strcmp(arg, "comm") == 0 || strcmp(arg, "COMM") == 0) {
code->op = FETCH_OP_COMM;
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
} else if (((flags & TPARG_FL_MASK) ==
@ -323,13 +330,6 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
code->op = FETCH_OP_ARG;
code->param = (unsigned int)param - 1;
#endif
} else if (flags & TPARG_FL_TPOINT) {
if (code->data)
return -EFAULT;
code->data = kstrdup(arg, GFP_KERNEL);
if (!code->data)
return -ENOMEM;
code->op = FETCH_OP_TP_ARG;
} else
goto inval_var;
@ -384,6 +384,11 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
break;
case '%': /* named register */
if (flags & TPARG_FL_TPOINT) {
/* eprobes do not handle registers */
trace_probe_log_err(offs, BAD_VAR);
break;
}
ret = regs_query_register_offset(arg + 1);
if (ret >= 0) {
code->op = FETCH_OP_REG;
@ -617,9 +622,11 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
/*
* Since $comm and immediate string can not be dereferenced,
* we can find those by strcmp.
* we can find those by strcmp. But ignore for eprobes.
*/
if (strcmp(arg, "$comm") == 0 || strncmp(arg, "\\\"", 2) == 0) {
if (!(flags & TPARG_FL_TPOINT) &&
(strcmp(arg, "$comm") == 0 || strcmp(arg, "$COMM") == 0 ||
strncmp(arg, "\\\"", 2) == 0)) {
/* The type of $comm must be "string", and not an array. */
if (parg->count || (t && strcmp(t, "string")))
goto out;