PSI updates for v6.1:
- Various performance optimizations, resulting in a 4%-9% speedup
in the mmtests/config-scheduler-perfpipe micro-benchmark.
- New interface to turn PSI on/off on a per cgroup level.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-----BEGIN PGP SIGNATURE-----
iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmNJKPsRHG1pbmdvQGtl
cm5lbC5vcmcACgkQEnMQ0APhK1iPmg//aovCitAQX2lLoHJDIgdQibU40oaEpKTX
wM549EGz3Dr6qmwF8+qT1U2Ge6af/hHQc5G/ZqDpKbuTjUIc3RmBkqX80dNKFLuH
uyi9UtfsSriw+ks8fWuDdjr+S4oppwW9ZoIXvK8v4bisd3F31DNGvKPTayNxt73m
lExfzJiD1oJixDxGX8MGO9QpcoywmjWjzjrB2P+J8hnTpArouHx/HOKdQOpG6wXq
ZRr9kZvju6ucDpXCTa1HJrfVRxNAh35tx/b4cDtXbBFifVAeKaPOrHapMTVsqfel
Z7T+2DymhidNYK0hrRJoGUwa/vkz+2Sm1ZLG9LlgUCXVco/9S1zw1ZuQakVvzPen
wriuxRaAkR+szCP0L8js5+/DAkGa43MjKsvQHmDVnetQtlsAD4eYnn+alQ837SXv
MP3jwFqF+e4mcWdoQcfh0OWUgGec5XZzdgRYrFkBKyTWGLB2iPivcAMNf0X/h82Q
xxv4DQJIIJ017GOQ/ho2saq+GbtFCvX8YnGYas9T47Bjjluhjo7jgTVtPTo+mhtN
RfwMdG718Ap/gvnAX7wMe/t+L/4AP8AIgDRi5L35dTRqETwOjH+LAvOYjleQFYgu
kMVtLMyzU+TGwHscuzPFRh7TnvSJ4sD48Ll1BPnyZsh3SS9u0gAs1bml7Cu7JbmW
SIZD/S/hzdI=
=91tB
-----END PGP SIGNATURE-----
Merge tag 'sched-psi-2022-10-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull PSI updates from Ingo Molnar:
- Various performance optimizations, resulting in a 4%-9% speedup in
the mmtests/config-scheduler-perfpipe micro-benchmark.
- New interface to turn PSI on/off on a per cgroup level.
* tag 'sched-psi-2022-10-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/psi: Per-cgroup PSI accounting disable/re-enable interface
sched/psi: Cache parent psi_group to speed up group iteration
sched/psi: Consolidate cgroup_psi()
sched/psi: Add PSI_IRQ to track IRQ/SOFTIRQ pressure
sched/psi: Remove NR_ONCPU task accounting
sched/psi: Optimize task switch inside shared cgroups again
sched/psi: Move private helpers to sched/stats.h
sched/psi: Save percpu memory when !psi_cgroups_enabled
sched/psi: Don't create cgroup PSI files when psi_disabled
sched/psi: Fix periodic aggregation shut off
This commit is contained in:
commit
bd9a3dba18
9 changed files with 362 additions and 103 deletions
|
|
@ -428,6 +428,9 @@ struct cgroup {
|
|||
struct cgroup_file procs_file; /* handle for "cgroup.procs" */
|
||||
struct cgroup_file events_file; /* handle for "cgroup.events" */
|
||||
|
||||
/* handles for "{cpu,memory,io,irq}.pressure" */
|
||||
struct cgroup_file psi_files[NR_PSI_RESOURCES];
|
||||
|
||||
/*
|
||||
* The bitmask of subsystems enabled on the child cgroups.
|
||||
* ->subtree_control is the one configured through
|
||||
|
|
|
|||
|
|
@ -682,11 +682,6 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
|
|||
pr_cont_kernfs_path(cgrp->kn);
|
||||
}
|
||||
|
||||
static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
|
||||
{
|
||||
return cgrp->psi;
|
||||
}
|
||||
|
||||
bool cgroup_psi_enabled(void);
|
||||
|
||||
static inline void cgroup_init_kthreadd(void)
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include <linux/sched.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/cgroup-defs.h>
|
||||
#include <linux/cgroup.h>
|
||||
|
||||
struct seq_file;
|
||||
struct css_set;
|
||||
|
|
@ -18,10 +19,6 @@ extern struct psi_group psi_system;
|
|||
|
||||
void psi_init(void);
|
||||
|
||||
void psi_task_change(struct task_struct *task, int clear, int set);
|
||||
void psi_task_switch(struct task_struct *prev, struct task_struct *next,
|
||||
bool sleep);
|
||||
|
||||
void psi_memstall_enter(unsigned long *flags);
|
||||
void psi_memstall_leave(unsigned long *flags);
|
||||
|
||||
|
|
@ -34,9 +31,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
|
|||
poll_table *wait);
|
||||
|
||||
#ifdef CONFIG_CGROUPS
|
||||
static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
|
||||
{
|
||||
return cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
|
||||
}
|
||||
|
||||
int psi_cgroup_alloc(struct cgroup *cgrp);
|
||||
void psi_cgroup_free(struct cgroup *cgrp);
|
||||
void cgroup_move_task(struct task_struct *p, struct css_set *to);
|
||||
void psi_cgroup_restart(struct psi_group *group);
|
||||
#endif
|
||||
|
||||
#else /* CONFIG_PSI */
|
||||
|
|
@ -58,6 +61,7 @@ static inline void cgroup_move_task(struct task_struct *p, struct css_set *to)
|
|||
{
|
||||
rcu_assign_pointer(p->cgroups, to);
|
||||
}
|
||||
static inline void psi_cgroup_restart(struct psi_group *group) {}
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_PSI */
|
||||
|
|
|
|||
|
|
@ -15,13 +15,6 @@ enum psi_task_count {
|
|||
NR_IOWAIT,
|
||||
NR_MEMSTALL,
|
||||
NR_RUNNING,
|
||||
/*
|
||||
* This can't have values other than 0 or 1 and could be
|
||||
* implemented as a bit flag. But for now we still have room
|
||||
* in the first cacheline of psi_group_cpu, and this way we
|
||||
* don't have to special case any state tracking for it.
|
||||
*/
|
||||
NR_ONCPU,
|
||||
/*
|
||||
* For IO and CPU stalls the presence of running/oncpu tasks
|
||||
* in the domain means a partial rather than a full stall.
|
||||
|
|
@ -32,22 +25,27 @@ enum psi_task_count {
|
|||
* threads and memstall ones.
|
||||
*/
|
||||
NR_MEMSTALL_RUNNING,
|
||||
NR_PSI_TASK_COUNTS = 5,
|
||||
NR_PSI_TASK_COUNTS = 4,
|
||||
};
|
||||
|
||||
/* Task state bitmasks */
|
||||
#define TSK_IOWAIT (1 << NR_IOWAIT)
|
||||
#define TSK_MEMSTALL (1 << NR_MEMSTALL)
|
||||
#define TSK_RUNNING (1 << NR_RUNNING)
|
||||
#define TSK_ONCPU (1 << NR_ONCPU)
|
||||
#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)
|
||||
|
||||
/* Only one task can be scheduled, no corresponding task count */
|
||||
#define TSK_ONCPU (1 << NR_PSI_TASK_COUNTS)
|
||||
|
||||
/* Resources that workloads could be stalled on */
|
||||
enum psi_res {
|
||||
PSI_IO,
|
||||
PSI_MEM,
|
||||
PSI_CPU,
|
||||
NR_PSI_RESOURCES = 3,
|
||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||
PSI_IRQ,
|
||||
#endif
|
||||
NR_PSI_RESOURCES,
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -63,11 +61,17 @@ enum psi_states {
|
|||
PSI_MEM_FULL,
|
||||
PSI_CPU_SOME,
|
||||
PSI_CPU_FULL,
|
||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||
PSI_IRQ_FULL,
|
||||
#endif
|
||||
/* Only per-CPU, to weigh the CPU in the global average: */
|
||||
PSI_NONIDLE,
|
||||
NR_PSI_STATES = 7,
|
||||
NR_PSI_STATES,
|
||||
};
|
||||
|
||||
/* Use one bit in the state mask to track TSK_ONCPU */
|
||||
#define PSI_ONCPU (1 << NR_PSI_STATES)
|
||||
|
||||
enum psi_aggregators {
|
||||
PSI_AVGS = 0,
|
||||
PSI_POLL,
|
||||
|
|
@ -147,6 +151,9 @@ struct psi_trigger {
|
|||
};
|
||||
|
||||
struct psi_group {
|
||||
struct psi_group *parent;
|
||||
bool enabled;
|
||||
|
||||
/* Protects data used by the aggregator */
|
||||
struct mutex avgs_lock;
|
||||
|
||||
|
|
@ -188,6 +195,8 @@ struct psi_group {
|
|||
|
||||
#else /* CONFIG_PSI */
|
||||
|
||||
#define NR_PSI_RESOURCES 0
|
||||
|
||||
struct psi_group { };
|
||||
|
||||
#endif /* CONFIG_PSI */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue