Merge branch 'for-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: - cgroup.kill is added which implements atomic killing of the whole subtree. Down the line, this should be able to replace the multiple userland implementations of "keep killing till empty". - PSI can now be turned off at boot time to avoid overhead for configurations which don't care about PSI. * 'for-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: make per-cgroup pressure stall tracking configurable cgroup: Fix kernel-doc cgroup: inline cgroup_task_freeze() tests/cgroup: test cgroup.kill tests/cgroup: move cg_wait_for(), cg_prepare_for_wait() tests/cgroup: use cgroup.kill in cg_killall() docs/cgroup: add entry for cgroup.kill cgroup: introduce cgroup.kill
This commit is contained in:
commit
3dbdb38e28
13 changed files with 569 additions and 108 deletions
|
|
@ -209,6 +209,22 @@ struct cgroup_namespace init_cgroup_ns = {
|
|||
static struct file_system_type cgroup2_fs_type;
|
||||
static struct cftype cgroup_base_files[];
|
||||
|
||||
/* cgroup optional features */
|
||||
enum cgroup_opt_features {
|
||||
#ifdef CONFIG_PSI
|
||||
OPT_FEATURE_PRESSURE,
|
||||
#endif
|
||||
OPT_FEATURE_COUNT
|
||||
};
|
||||
|
||||
static const char *cgroup_opt_feature_names[OPT_FEATURE_COUNT] = {
|
||||
#ifdef CONFIG_PSI
|
||||
"pressure",
|
||||
#endif
|
||||
};
|
||||
|
||||
static u16 cgroup_feature_disable_mask __read_mostly;
|
||||
|
||||
static int cgroup_apply_control(struct cgroup *cgrp);
|
||||
static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
|
||||
static void css_task_iter_skip(struct css_task_iter *it,
|
||||
|
|
@ -2390,7 +2406,7 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
|
|||
}
|
||||
|
||||
/**
|
||||
* cgroup_taskset_migrate - migrate a taskset
|
||||
* cgroup_migrate_execute - migrate a taskset
|
||||
* @mgctx: migration context
|
||||
*
|
||||
* Migrate tasks in @mgctx as setup by migration preparation functions.
|
||||
|
|
@ -3632,6 +3648,18 @@ static void cgroup_pressure_release(struct kernfs_open_file *of)
|
|||
{
|
||||
psi_trigger_replace(&of->priv, NULL);
|
||||
}
|
||||
|
||||
bool cgroup_psi_enabled(void)
|
||||
{
|
||||
return (cgroup_feature_disable_mask & (1 << OPT_FEATURE_PRESSURE)) == 0;
|
||||
}
|
||||
|
||||
#else /* CONFIG_PSI */
|
||||
bool cgroup_psi_enabled(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_PSI */
|
||||
|
||||
static int cgroup_freeze_show(struct seq_file *seq, void *v)
|
||||
|
|
@ -3668,6 +3696,80 @@ static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
|
|||
return nbytes;
|
||||
}
|
||||
|
||||
static void __cgroup_kill(struct cgroup *cgrp)
|
||||
{
|
||||
struct css_task_iter it;
|
||||
struct task_struct *task;
|
||||
|
||||
lockdep_assert_held(&cgroup_mutex);
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
set_bit(CGRP_KILL, &cgrp->flags);
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
|
||||
css_task_iter_start(&cgrp->self, CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED, &it);
|
||||
while ((task = css_task_iter_next(&it))) {
|
||||
/* Ignore kernel threads here. */
|
||||
if (task->flags & PF_KTHREAD)
|
||||
continue;
|
||||
|
||||
/* Skip tasks that are already dying. */
|
||||
if (__fatal_signal_pending(task))
|
||||
continue;
|
||||
|
||||
send_sig(SIGKILL, task, 0);
|
||||
}
|
||||
css_task_iter_end(&it);
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
clear_bit(CGRP_KILL, &cgrp->flags);
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
}
|
||||
|
||||
static void cgroup_kill(struct cgroup *cgrp)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
struct cgroup *dsct;
|
||||
|
||||
lockdep_assert_held(&cgroup_mutex);
|
||||
|
||||
cgroup_for_each_live_descendant_pre(dsct, css, cgrp)
|
||||
__cgroup_kill(dsct);
|
||||
}
|
||||
|
||||
static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf,
|
||||
size_t nbytes, loff_t off)
|
||||
{
|
||||
ssize_t ret = 0;
|
||||
int kill;
|
||||
struct cgroup *cgrp;
|
||||
|
||||
ret = kstrtoint(strstrip(buf), 0, &kill);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (kill != 1)
|
||||
return -ERANGE;
|
||||
|
||||
cgrp = cgroup_kn_lock_live(of->kn, false);
|
||||
if (!cgrp)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* Killing is a process directed operation, i.e. the whole thread-group
|
||||
* is taken down so act like we do for cgroup.procs and only make this
|
||||
* writable in non-threaded cgroups.
|
||||
*/
|
||||
if (cgroup_is_threaded(cgrp))
|
||||
ret = -EOPNOTSUPP;
|
||||
else
|
||||
cgroup_kill(cgrp);
|
||||
|
||||
cgroup_kn_unlock(of->kn);
|
||||
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
static int cgroup_file_open(struct kernfs_open_file *of)
|
||||
{
|
||||
struct cftype *cft = of_cft(of);
|
||||
|
|
@ -3882,6 +3984,8 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css,
|
|||
restart:
|
||||
for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
|
||||
/* does cft->flags tell us to skip this file on @cgrp? */
|
||||
if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
|
||||
continue;
|
||||
if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
|
||||
continue;
|
||||
if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
|
||||
|
|
@ -3959,6 +4063,9 @@ static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
|
|||
|
||||
WARN_ON(cft->ss || cft->kf_ops);
|
||||
|
||||
if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
|
||||
continue;
|
||||
|
||||
if (cft->seq_start)
|
||||
kf_ops = &cgroup_kf_ops;
|
||||
else
|
||||
|
|
@ -4860,6 +4967,11 @@ static struct cftype cgroup_base_files[] = {
|
|||
.seq_show = cgroup_freeze_show,
|
||||
.write = cgroup_freeze_write,
|
||||
},
|
||||
{
|
||||
.name = "cgroup.kill",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.write = cgroup_kill_write,
|
||||
},
|
||||
{
|
||||
.name = "cpu.stat",
|
||||
.seq_show = cpu_stat_show,
|
||||
|
|
@ -4867,6 +4979,7 @@ static struct cftype cgroup_base_files[] = {
|
|||
#ifdef CONFIG_PSI
|
||||
{
|
||||
.name = "io.pressure",
|
||||
.flags = CFTYPE_PRESSURE,
|
||||
.seq_show = cgroup_io_pressure_show,
|
||||
.write = cgroup_io_pressure_write,
|
||||
.poll = cgroup_pressure_poll,
|
||||
|
|
@ -4874,6 +4987,7 @@ static struct cftype cgroup_base_files[] = {
|
|||
},
|
||||
{
|
||||
.name = "memory.pressure",
|
||||
.flags = CFTYPE_PRESSURE,
|
||||
.seq_show = cgroup_memory_pressure_show,
|
||||
.write = cgroup_memory_pressure_write,
|
||||
.poll = cgroup_pressure_poll,
|
||||
|
|
@ -4881,6 +4995,7 @@ static struct cftype cgroup_base_files[] = {
|
|||
},
|
||||
{
|
||||
.name = "cpu.pressure",
|
||||
.flags = CFTYPE_PRESSURE,
|
||||
.seq_show = cgroup_cpu_pressure_show,
|
||||
.write = cgroup_cpu_pressure_write,
|
||||
.poll = cgroup_pressure_poll,
|
||||
|
|
@ -6080,6 +6195,8 @@ void cgroup_post_fork(struct task_struct *child,
|
|||
struct kernel_clone_args *kargs)
|
||||
__releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex)
|
||||
{
|
||||
unsigned long cgrp_flags = 0;
|
||||
bool kill = false;
|
||||
struct cgroup_subsys *ss;
|
||||
struct css_set *cset;
|
||||
int i;
|
||||
|
|
@ -6091,6 +6208,11 @@ void cgroup_post_fork(struct task_struct *child,
|
|||
|
||||
/* init tasks are special, only link regular threads */
|
||||
if (likely(child->pid)) {
|
||||
if (kargs->cgrp)
|
||||
cgrp_flags = kargs->cgrp->flags;
|
||||
else
|
||||
cgrp_flags = cset->dfl_cgrp->flags;
|
||||
|
||||
WARN_ON_ONCE(!list_empty(&child->cg_list));
|
||||
cset->nr_tasks++;
|
||||
css_set_move_task(child, NULL, cset, false);
|
||||
|
|
@ -6099,23 +6221,32 @@ void cgroup_post_fork(struct task_struct *child,
|
|||
cset = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the cgroup has to be frozen, the new task has too. Let's set
|
||||
* the JOBCTL_TRAP_FREEZE jobctl bit to get the task into the
|
||||
* frozen state.
|
||||
*/
|
||||
if (unlikely(cgroup_task_freeze(child))) {
|
||||
spin_lock(&child->sighand->siglock);
|
||||
WARN_ON_ONCE(child->frozen);
|
||||
child->jobctl |= JOBCTL_TRAP_FREEZE;
|
||||
spin_unlock(&child->sighand->siglock);
|
||||
if (!(child->flags & PF_KTHREAD)) {
|
||||
if (unlikely(test_bit(CGRP_FREEZE, &cgrp_flags))) {
|
||||
/*
|
||||
* If the cgroup has to be frozen, the new task has
|
||||
* too. Let's set the JOBCTL_TRAP_FREEZE jobctl bit to
|
||||
* get the task into the frozen state.
|
||||
*/
|
||||
spin_lock(&child->sighand->siglock);
|
||||
WARN_ON_ONCE(child->frozen);
|
||||
child->jobctl |= JOBCTL_TRAP_FREEZE;
|
||||
spin_unlock(&child->sighand->siglock);
|
||||
|
||||
/*
|
||||
* Calling cgroup_update_frozen() isn't required here,
|
||||
* because it will be called anyway a bit later from
|
||||
* do_freezer_trap(). So we avoid cgroup's transient
|
||||
* switch from the frozen state and back.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Calling cgroup_update_frozen() isn't required here,
|
||||
* because it will be called anyway a bit later from
|
||||
* do_freezer_trap(). So we avoid cgroup's transient switch
|
||||
* from the frozen state and back.
|
||||
* If the cgroup is to be killed notice it now and take the
|
||||
* child down right after we finished preparing it for
|
||||
* userspace.
|
||||
*/
|
||||
kill = test_bit(CGRP_KILL, &cgrp_flags);
|
||||
}
|
||||
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
|
|
@ -6138,6 +6269,10 @@ void cgroup_post_fork(struct task_struct *child,
|
|||
put_css_set(rcset);
|
||||
}
|
||||
|
||||
/* Cgroup has to be killed so take down child immediately. */
|
||||
if (unlikely(kill))
|
||||
do_send_sig_info(SIGKILL, SEND_SIG_NOINFO, child, PIDTYPE_TGID);
|
||||
|
||||
cgroup_css_set_put_fork(kargs);
|
||||
}
|
||||
|
||||
|
|
@ -6163,7 +6298,8 @@ void cgroup_exit(struct task_struct *tsk)
|
|||
cset->nr_tasks--;
|
||||
|
||||
WARN_ON_ONCE(cgroup_task_frozen(tsk));
|
||||
if (unlikely(cgroup_task_freeze(tsk)))
|
||||
if (unlikely(!(tsk->flags & PF_KTHREAD) &&
|
||||
test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))
|
||||
cgroup_update_frozen(task_dfl_cgroup(tsk));
|
||||
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
|
|
@ -6214,6 +6350,15 @@ static int __init cgroup_disable(char *str)
|
|||
pr_info("Disabling %s control group subsystem\n",
|
||||
ss->name);
|
||||
}
|
||||
|
||||
for (i = 0; i < OPT_FEATURE_COUNT; i++) {
|
||||
if (strcmp(token, cgroup_opt_feature_names[i]))
|
||||
continue;
|
||||
cgroup_feature_disable_mask |= 1 << i;
|
||||
pr_info("Disabling %s control group feature\n",
|
||||
cgroup_opt_feature_names[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
|
@ -6512,6 +6657,9 @@ static ssize_t show_delegatable_files(struct cftype *files, char *buf,
|
|||
if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
|
||||
continue;
|
||||
|
||||
if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
|
||||
continue;
|
||||
|
||||
if (prefix)
|
||||
ret += snprintf(buf + ret, size - ret, "%s.", prefix);
|
||||
|
||||
|
|
|
|||
|
|
@ -220,7 +220,7 @@ void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp)
|
|||
}
|
||||
|
||||
/**
|
||||
* cgroup_rstat_flush_begin - flush stats in @cgrp's subtree and hold
|
||||
* cgroup_rstat_flush_hold - flush stats in @cgrp's subtree and hold
|
||||
* @cgrp: target cgroup
|
||||
*
|
||||
* Flush stats in @cgrp's subtree and prevent further flushes. Must be
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue