Scheduler updates for this cycle are:
- Clean up SCHED_DEBUG: move the decades old mess of sysctl, procfs and debugfs interfaces
to a unified debugfs interface.
- Signals: Allow caching one sigqueue object per task, to improve performance & latencies.
- Improve newidle_balance() irq-off latencies on systems with a large number of CPU cgroups.
- Improve energy-aware scheduling
- Improve the PELT metrics for certain workloads
- Reintroduce select_idle_smt() to improve load-balancing locality - but without the previous
regressions
- Add 'scheduler latency debugging': warn after long periods of pending need_resched. This
is an opt-in feature that requires the enabling of the LATENCY_WARN scheduler feature,
or the use of the resched_latency_warn_ms=xx boot parameter.
- CPU hotplug fixes for HP-rollback, and for the 'fail' interface. Fix remaining
balance_push() vs. hotplug holes/races
- PSI fixes, plus allow /proc/pressure/ files to be written by CAP_SYS_RESOURCE tasks as well
- Fix/improve various load-balancing corner cases vs. capacity margins
- Fix sched topology on systems with NUMA diameter of 3 or above
- Fix PF_KTHREAD vs to_kthread() race
- Minor rseq optimizations
- Misc cleanups, optimizations, fixes and smaller updates
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-----BEGIN PGP SIGNATURE-----
iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmCJInsRHG1pbmdvQGtl
cm5lbC5vcmcACgkQEnMQ0APhK1i5XxAArh0b+fwXlkVGzTUly7HQjhU7lFbChnmF
h6ToyNLi6pXoZ14VC/WoRIME+RzK3gmw9cEFaSLVPxbkbekTcyWS78kqmcg1/j2v
kO/20QhXobiIxVskYfoMmqSavZ5mKhMWBqtFXkCuYfxwGylas0VVdh3AZLJ7N21G
WEoFh99pVULwWnPHxM2ZQ87Ex9BkGKbsBTswxWpprCfXLqD0N2hHlABpwJP78zRf
VniWFOcC7lslILCFawb7CqGgAwbgV85nDRS4QCuCKisrkFywvjJrEeu/W+h1NfhF
d6ves/osNdEAM1DSALoxwEA42An8l8xh8NyJnl8JZV00LW0DM108O5/7pf5Zcryc
RHV3RxA7skgezBh5uThvo60QzNK+kVMatI4qpQEHxLE52CaDl/fBu1Cgb/VUxnIl
AEBfyiFbk+skHpuMFKtl30Tx3M+yJKMTzFPd4kYjHYGEDwtAcXcB3dJQW48A79i3
H3IWcDcXpk5Rjo2UZmaXdt/qlj7mP6U0xdOUq8ZK6JOC4uY9skszVGsfuNN9QQ5u
2E2YKKVrGFoQydl4C8R6A7axL2VzIJszHFZNipd8E3YOyW7PWRAkr02tOOkBTj8N
dLMcNM7aPJWqEYiEIjEzGQN20pweJ1dRA29LDuOswKh+7W2bWTQFh6F2Q8Haansc
RVg5PDzl+Mc=
=E7mz
-----END PGP SIGNATURE-----
Merge tag 'sched-core-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
- Clean up SCHED_DEBUG: move the decades old mess of sysctl, procfs and
debugfs interfaces to a unified debugfs interface.
- Signals: Allow caching one sigqueue object per task, to improve
performance & latencies.
- Improve newidle_balance() irq-off latencies on systems with a large
number of CPU cgroups.
- Improve energy-aware scheduling
- Improve the PELT metrics for certain workloads
- Reintroduce select_idle_smt() to improve load-balancing locality -
but without the previous regressions
- Add 'scheduler latency debugging': warn after long periods of pending
need_resched. This is an opt-in feature that requires the enabling of
the LATENCY_WARN scheduler feature, or the use of the
resched_latency_warn_ms=xx boot parameter.
- CPU hotplug fixes for HP-rollback, and for the 'fail' interface. Fix
remaining balance_push() vs. hotplug holes/races
- PSI fixes, plus allow /proc/pressure/ files to be written by
CAP_SYS_RESOURCE tasks as well
- Fix/improve various load-balancing corner cases vs. capacity margins
- Fix sched topology on systems with NUMA diameter of 3 or above
- Fix PF_KTHREAD vs to_kthread() race
- Minor rseq optimizations
- Misc cleanups, optimizations, fixes and smaller updates
* tag 'sched-core-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (61 commits)
cpumask/hotplug: Fix cpu_dying() state tracking
kthread: Fix PF_KTHREAD vs to_kthread() race
sched/debug: Fix cgroup_path[] serialization
sched,psi: Handle potential task count underflow bugs more gracefully
sched: Warn on long periods of pending need_resched
sched/fair: Move update_nohz_stats() to the CONFIG_NO_HZ_COMMON block to simplify the code & fix an unused function warning
sched/debug: Rename the sched_debug parameter to sched_verbose
sched,fair: Alternative sched_slice()
sched: Move /proc/sched_debug to debugfs
sched,debug: Convert sysctl sched_domains to debugfs
debugfs: Implement debugfs_create_str()
sched,preempt: Move preempt_dynamic to debug.c
sched: Move SCHED_DEBUG sysctl to debugfs
sched: Don't make LATENCYTOP select SCHED_DEBUG
sched: Remove sched_schedstats sysctl out from under SCHED_DEBUG
sched/numa: Allow runtime enabling/disabling of NUMA balance without SCHED_DEBUG
sched: Use cpu_dying() to fix balance_push vs hotplug-rollback
cpumask: Introduce DYING mask
cpumask: Make cpu_{online,possible,present,active}() inline
rseq: Optimise rseq_get_rseq_cs() and clear_rseq_cs()
...
This commit is contained in:
commit
16b3d0cf5b
46 changed files with 1305 additions and 921 deletions
|
|
@ -91,44 +91,15 @@ extern struct cpumask __cpu_possible_mask;
|
|||
extern struct cpumask __cpu_online_mask;
|
||||
extern struct cpumask __cpu_present_mask;
|
||||
extern struct cpumask __cpu_active_mask;
|
||||
extern struct cpumask __cpu_dying_mask;
|
||||
#define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask)
|
||||
#define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask)
|
||||
#define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask)
|
||||
#define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask)
|
||||
#define cpu_dying_mask ((const struct cpumask *)&__cpu_dying_mask)
|
||||
|
||||
extern atomic_t __num_online_cpus;
|
||||
|
||||
#if NR_CPUS > 1
|
||||
/**
|
||||
* num_online_cpus() - Read the number of online CPUs
|
||||
*
|
||||
* Despite the fact that __num_online_cpus is of type atomic_t, this
|
||||
* interface gives only a momentary snapshot and is not protected against
|
||||
* concurrent CPU hotplug operations unless invoked from a cpuhp_lock held
|
||||
* region.
|
||||
*/
|
||||
static inline unsigned int num_online_cpus(void)
|
||||
{
|
||||
return atomic_read(&__num_online_cpus);
|
||||
}
|
||||
#define num_possible_cpus() cpumask_weight(cpu_possible_mask)
|
||||
#define num_present_cpus() cpumask_weight(cpu_present_mask)
|
||||
#define num_active_cpus() cpumask_weight(cpu_active_mask)
|
||||
#define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask)
|
||||
#define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask)
|
||||
#define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask)
|
||||
#define cpu_active(cpu) cpumask_test_cpu((cpu), cpu_active_mask)
|
||||
#else
|
||||
#define num_online_cpus() 1U
|
||||
#define num_possible_cpus() 1U
|
||||
#define num_present_cpus() 1U
|
||||
#define num_active_cpus() 1U
|
||||
#define cpu_online(cpu) ((cpu) == 0)
|
||||
#define cpu_possible(cpu) ((cpu) == 0)
|
||||
#define cpu_present(cpu) ((cpu) == 0)
|
||||
#define cpu_active(cpu) ((cpu) == 0)
|
||||
#endif
|
||||
|
||||
extern cpumask_t cpus_booted_once_mask;
|
||||
|
||||
static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
|
||||
|
|
@ -857,6 +828,14 @@ set_cpu_active(unsigned int cpu, bool active)
|
|||
cpumask_clear_cpu(cpu, &__cpu_active_mask);
|
||||
}
|
||||
|
||||
static inline void
|
||||
set_cpu_dying(unsigned int cpu, bool dying)
|
||||
{
|
||||
if (dying)
|
||||
cpumask_set_cpu(cpu, &__cpu_dying_mask);
|
||||
else
|
||||
cpumask_clear_cpu(cpu, &__cpu_dying_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
|
||||
|
|
@ -894,6 +873,82 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
|
|||
return to_cpumask(p);
|
||||
}
|
||||
|
||||
#if NR_CPUS > 1
|
||||
/**
|
||||
* num_online_cpus() - Read the number of online CPUs
|
||||
*
|
||||
* Despite the fact that __num_online_cpus is of type atomic_t, this
|
||||
* interface gives only a momentary snapshot and is not protected against
|
||||
* concurrent CPU hotplug operations unless invoked from a cpuhp_lock held
|
||||
* region.
|
||||
*/
|
||||
static inline unsigned int num_online_cpus(void)
|
||||
{
|
||||
return atomic_read(&__num_online_cpus);
|
||||
}
|
||||
#define num_possible_cpus() cpumask_weight(cpu_possible_mask)
|
||||
#define num_present_cpus() cpumask_weight(cpu_present_mask)
|
||||
#define num_active_cpus() cpumask_weight(cpu_active_mask)
|
||||
|
||||
static inline bool cpu_online(unsigned int cpu)
|
||||
{
|
||||
return cpumask_test_cpu(cpu, cpu_online_mask);
|
||||
}
|
||||
|
||||
static inline bool cpu_possible(unsigned int cpu)
|
||||
{
|
||||
return cpumask_test_cpu(cpu, cpu_possible_mask);
|
||||
}
|
||||
|
||||
static inline bool cpu_present(unsigned int cpu)
|
||||
{
|
||||
return cpumask_test_cpu(cpu, cpu_present_mask);
|
||||
}
|
||||
|
||||
static inline bool cpu_active(unsigned int cpu)
|
||||
{
|
||||
return cpumask_test_cpu(cpu, cpu_active_mask);
|
||||
}
|
||||
|
||||
static inline bool cpu_dying(unsigned int cpu)
|
||||
{
|
||||
return cpumask_test_cpu(cpu, cpu_dying_mask);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define num_online_cpus() 1U
|
||||
#define num_possible_cpus() 1U
|
||||
#define num_present_cpus() 1U
|
||||
#define num_active_cpus() 1U
|
||||
|
||||
static inline bool cpu_online(unsigned int cpu)
|
||||
{
|
||||
return cpu == 0;
|
||||
}
|
||||
|
||||
static inline bool cpu_possible(unsigned int cpu)
|
||||
{
|
||||
return cpu == 0;
|
||||
}
|
||||
|
||||
static inline bool cpu_present(unsigned int cpu)
|
||||
{
|
||||
return cpu == 0;
|
||||
}
|
||||
|
||||
static inline bool cpu_active(unsigned int cpu)
|
||||
{
|
||||
return cpu == 0;
|
||||
}
|
||||
|
||||
static inline bool cpu_dying(unsigned int cpu)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* NR_CPUS > 1 */
|
||||
|
||||
#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
|
||||
|
||||
#if NR_CPUS <= BITS_PER_LONG
|
||||
|
|
|
|||
|
|
@ -128,6 +128,8 @@ void debugfs_create_atomic_t(const char *name, umode_t mode,
|
|||
struct dentry *parent, atomic_t *value);
|
||||
struct dentry *debugfs_create_bool(const char *name, umode_t mode,
|
||||
struct dentry *parent, bool *value);
|
||||
void debugfs_create_str(const char *name, umode_t mode,
|
||||
struct dentry *parent, char **value);
|
||||
|
||||
struct dentry *debugfs_create_blob(const char *name, umode_t mode,
|
||||
struct dentry *parent,
|
||||
|
|
@ -156,6 +158,9 @@ ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf,
|
|||
ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf,
|
||||
size_t count, loff_t *ppos);
|
||||
|
||||
ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf,
|
||||
size_t count, loff_t *ppos);
|
||||
|
||||
#else
|
||||
|
||||
#include <linux/err.h>
|
||||
|
|
@ -297,6 +302,11 @@ static inline struct dentry *debugfs_create_bool(const char *name, umode_t mode,
|
|||
return ERR_PTR(-ENODEV);
|
||||
}
|
||||
|
||||
static inline void debugfs_create_str(const char *name, umode_t mode,
|
||||
struct dentry *parent,
|
||||
char **value)
|
||||
{ }
|
||||
|
||||
static inline struct dentry *debugfs_create_blob(const char *name, umode_t mode,
|
||||
struct dentry *parent,
|
||||
struct debugfs_blob_wrapper *blob)
|
||||
|
|
@ -348,6 +358,13 @@ static inline ssize_t debugfs_write_file_bool(struct file *file,
|
|||
return -ENODEV;
|
||||
}
|
||||
|
||||
static inline ssize_t debugfs_read_file_str(struct file *file,
|
||||
char __user *user_buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#ifndef _LINUX_KCOV_H
|
||||
#define _LINUX_KCOV_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <uapi/linux/kcov.h>
|
||||
|
||||
struct task_struct;
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ void psi_task_change(struct task_struct *task, int clear, int set);
|
|||
void psi_task_switch(struct task_struct *prev, struct task_struct *next,
|
||||
bool sleep);
|
||||
|
||||
void psi_memstall_tick(struct task_struct *task, int cpu);
|
||||
void psi_memstall_enter(unsigned long *flags);
|
||||
void psi_memstall_leave(unsigned long *flags);
|
||||
|
||||
|
|
|
|||
|
|
@ -50,9 +50,10 @@ enum psi_states {
|
|||
PSI_MEM_SOME,
|
||||
PSI_MEM_FULL,
|
||||
PSI_CPU_SOME,
|
||||
PSI_CPU_FULL,
|
||||
/* Only per-CPU, to weigh the CPU in the global average: */
|
||||
PSI_NONIDLE,
|
||||
NR_PSI_STATES = 6,
|
||||
NR_PSI_STATES = 7,
|
||||
};
|
||||
|
||||
enum psi_aggregators {
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
#include <linux/pid.h>
|
||||
#include <linux/sem.h>
|
||||
#include <linux/shm.h>
|
||||
#include <linux/kcov.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/plist.h>
|
||||
#include <linux/hrtimer.h>
|
||||
|
|
@ -985,6 +984,7 @@ struct task_struct {
|
|||
/* Signal handlers: */
|
||||
struct signal_struct *signal;
|
||||
struct sighand_struct __rcu *sighand;
|
||||
struct sigqueue *sigqueue_cache;
|
||||
sigset_t blocked;
|
||||
sigset_t real_blocked;
|
||||
/* Restored if set_restore_sigmask() was used: */
|
||||
|
|
@ -1101,7 +1101,7 @@ struct task_struct {
|
|||
#ifdef CONFIG_CPUSETS
|
||||
/* Protected by ->alloc_lock: */
|
||||
nodemask_t mems_allowed;
|
||||
/* Seqence number to catch updates: */
|
||||
/* Sequence number to catch updates: */
|
||||
seqcount_spinlock_t mems_allowed_seq;
|
||||
int cpuset_mem_spread_rotor;
|
||||
int cpuset_slab_spread_rotor;
|
||||
|
|
|
|||
|
|
@ -26,10 +26,11 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
|
|||
enum { sysctl_hung_task_timeout_secs = 0 };
|
||||
#endif
|
||||
|
||||
extern unsigned int sysctl_sched_child_runs_first;
|
||||
|
||||
extern unsigned int sysctl_sched_latency;
|
||||
extern unsigned int sysctl_sched_min_granularity;
|
||||
extern unsigned int sysctl_sched_wakeup_granularity;
|
||||
extern unsigned int sysctl_sched_child_runs_first;
|
||||
|
||||
enum sched_tunable_scaling {
|
||||
SCHED_TUNABLESCALING_NONE,
|
||||
|
|
@ -37,7 +38,7 @@ enum sched_tunable_scaling {
|
|||
SCHED_TUNABLESCALING_LINEAR,
|
||||
SCHED_TUNABLESCALING_END,
|
||||
};
|
||||
extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
|
||||
extern unsigned int sysctl_sched_tunable_scaling;
|
||||
|
||||
extern unsigned int sysctl_numa_balancing_scan_delay;
|
||||
extern unsigned int sysctl_numa_balancing_scan_period_min;
|
||||
|
|
@ -48,8 +49,8 @@ extern unsigned int sysctl_numa_balancing_scan_size;
|
|||
extern __read_mostly unsigned int sysctl_sched_migration_cost;
|
||||
extern __read_mostly unsigned int sysctl_sched_nr_migrate;
|
||||
|
||||
int sched_proc_update_handler(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *length, loff_t *ppos);
|
||||
extern int sysctl_resched_latency_warn_ms;
|
||||
extern int sysctl_resched_latency_warn_once;
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -266,6 +266,7 @@ static inline void init_sigpending(struct sigpending *sig)
|
|||
}
|
||||
|
||||
extern void flush_sigqueue(struct sigpending *queue);
|
||||
extern void exit_task_sigqueue_cache(struct task_struct *tsk);
|
||||
|
||||
/* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
|
||||
static inline int valid_signal(unsigned long sig)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue