Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched-devel

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched-devel: (62 commits)
  sched: build fix
  sched: better rt-group documentation
  sched: features fix
  sched: /debug/sched_features
  sched: add SCHED_FEAT_DEADLINE
  sched: debug: show a weight tree
  sched: fair: weight calculations
  sched: fair-group: de-couple load-balancing from the rb-trees
  sched: fair-group scheduling vs latency
  sched: rt-group: optimize dequeue_rt_stack
  sched: debug: add some debug code to handle the full hierarchy
  sched: fair-group: SMP-nice for group scheduling
  sched, cpuset: customize sched domains, core
  sched, cpuset: customize sched domains, docs
  sched: prepatory code movement
  sched: rt: multi level group constraints
  sched: task_group hierarchy
  sched: fix the task_group hierarchy for UID grouping
  sched: allow the group scheduler to have multiple levels
  sched: mix tasks and groups
  ...
This commit is contained in:
Linus Torvalds 2008-04-21 15:40:24 -07:00
commit ec965350bb
68 changed files with 3174 additions and 1014 deletions

View file

@ -108,6 +108,7 @@ extern int __bitmap_weight(const unsigned long *bitmap, int bits);
extern int bitmap_scnprintf(char *buf, unsigned int len,
const unsigned long *src, int nbits);
extern int bitmap_scnprintf_len(unsigned int len);
extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user,
unsigned long *dst, int nbits);
extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen,

View file

@ -222,8 +222,13 @@ int __next_cpu(int n, const cpumask_t *srcp);
#define next_cpu(n, src) ({ (void)(src); 1; })
#endif
#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
extern cpumask_t *cpumask_of_cpu_map;
#define cpumask_of_cpu(cpu) (cpumask_of_cpu_map[cpu])
#else
#define cpumask_of_cpu(cpu) \
({ \
(*({ \
typeof(_unused_cpumask_arg_) m; \
if (sizeof(m) == sizeof(unsigned long)) { \
m.bits[0] = 1UL<<(cpu); \
@ -231,8 +236,9 @@ int __next_cpu(int n, const cpumask_t *srcp);
cpus_clear(m); \
cpu_set((cpu), m); \
} \
m; \
})
&m; \
}))
#endif
#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
@ -243,6 +249,8 @@ int __next_cpu(int n, const cpumask_t *srcp);
[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
} }
#define CPU_MASK_ALL_PTR (&CPU_MASK_ALL)
#else
#define CPU_MASK_ALL \
@ -251,6 +259,10 @@ int __next_cpu(int n, const cpumask_t *srcp);
[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
} }
/* cpu_mask_all is in init/main.c */
extern cpumask_t cpu_mask_all;
#define CPU_MASK_ALL_PTR (&cpu_mask_all)
#endif
#define CPU_MASK_NONE \
@ -273,6 +285,13 @@ static inline int __cpumask_scnprintf(char *buf, int len,
return bitmap_scnprintf(buf, len, srcp->bits, nbits);
}
#define cpumask_scnprintf_len(len) \
__cpumask_scnprintf_len((len))
static inline int __cpumask_scnprintf_len(int len)
{
return bitmap_scnprintf_len(len);
}
#define cpumask_parse_user(ubuf, ulen, dst) \
__cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS)
static inline int __cpumask_parse_user(const char __user *buf, int len,

View file

@ -20,8 +20,8 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
extern int cpuset_init_early(void);
extern int cpuset_init(void);
extern void cpuset_init_smp(void);
extern cpumask_t cpuset_cpus_allowed(struct task_struct *p);
extern cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p);
extern void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask);
extern void cpuset_cpus_allowed_locked(struct task_struct *p, cpumask_t *mask);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
#define cpuset_current_mems_allowed (current->mems_allowed)
void cpuset_init_current_mems_allowed(void);
@ -84,13 +84,14 @@ static inline int cpuset_init_early(void) { return 0; }
static inline int cpuset_init(void) { return 0; }
static inline void cpuset_init_smp(void) {}
static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p)
static inline void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask)
{
return cpu_possible_map;
*mask = cpu_possible_map;
}
static inline cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p)
static inline void cpuset_cpus_allowed_locked(struct task_struct *p,
cpumask_t *mask)
{
return cpu_possible_map;
*mask = cpu_possible_map;
}
static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)

View file

@ -151,6 +151,9 @@ extern struct group_info init_groups;
.cpus_allowed = CPU_MASK_ALL, \
.mm = NULL, \
.active_mm = &init_mm, \
.se = { \
.group_node = LIST_HEAD_INIT(tsk.se.group_node), \
}, \
.rt = { \
.run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
.time_slice = HZ, \

View file

@ -327,4 +327,10 @@ extern void ktime_get_ts(struct timespec *ts);
/* Get the real (wall-) time in timespec format: */
#define ktime_get_real_ts(ts) getnstimeofday(ts)
static inline ktime_t ns_to_ktime(u64 ns)
{
static const ktime_t ktime_zero = { .tv64 = 0 };
return ktime_add_ns(ktime_zero, ns);
}
#endif

View file

@ -704,6 +704,7 @@ enum cpu_idle_type {
#define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */
#define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 1024 /* Only a single load balancing instance */
#define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */
#define BALANCE_FOR_MC_POWER \
(sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0)
@ -733,12 +734,31 @@ struct sched_group {
u32 reciprocal_cpu_power;
};
enum sched_domain_level {
SD_LV_NONE = 0,
SD_LV_SIBLING,
SD_LV_MC,
SD_LV_CPU,
SD_LV_NODE,
SD_LV_ALLNODES,
SD_LV_MAX
};
struct sched_domain_attr {
int relax_domain_level;
};
#define SD_ATTR_INIT (struct sched_domain_attr) { \
.relax_domain_level = -1, \
}
struct sched_domain {
/* These fields must be setup */
struct sched_domain *parent; /* top domain must be null terminated */
struct sched_domain *child; /* bottom domain must be null terminated */
struct sched_group *groups; /* the balancing groups of the domain */
cpumask_t span; /* span of all CPUs in this domain */
int first_cpu; /* cache of the first cpu in this domain */
unsigned long min_interval; /* Minimum balance interval ms */
unsigned long max_interval; /* Maximum balance interval ms */
unsigned int busy_factor; /* less balancing by factor if busy */
@ -750,6 +770,7 @@ struct sched_domain {
unsigned int wake_idx;
unsigned int forkexec_idx;
int flags; /* See SD_* */
enum sched_domain_level level;
/* Runtime fields. */
unsigned long last_balance; /* init to jiffies. units in jiffies */
@ -789,7 +810,8 @@ struct sched_domain {
#endif
};
extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new);
extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
struct sched_domain_attr *dattr_new);
extern int arch_reinit_sched_domains(void);
#endif /* CONFIG_SMP */
@ -889,7 +911,8 @@ struct sched_class {
void (*set_curr_task) (struct rq *rq);
void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
void (*task_new) (struct rq *rq, struct task_struct *p);
void (*set_cpus_allowed)(struct task_struct *p, cpumask_t *newmask);
void (*set_cpus_allowed)(struct task_struct *p,
const cpumask_t *newmask);
void (*join_domain)(struct rq *rq);
void (*leave_domain)(struct rq *rq);
@ -923,6 +946,7 @@ struct load_weight {
struct sched_entity {
struct load_weight load; /* for load-balancing */
struct rb_node run_node;
struct list_head group_node;
unsigned int on_rq;
u64 exec_start;
@ -982,6 +1006,7 @@ struct sched_rt_entity {
unsigned long timeout;
int nr_cpus_allowed;
struct sched_rt_entity *back;
#ifdef CONFIG_RT_GROUP_SCHED
struct sched_rt_entity *parent;
/* rq on which this entity is (to be) queued: */
@ -1502,15 +1527,21 @@ static inline void put_task_struct(struct task_struct *t)
#define used_math() tsk_used_math(current)
#ifdef CONFIG_SMP
extern int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask);
extern int set_cpus_allowed_ptr(struct task_struct *p,
const cpumask_t *new_mask);
#else
static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
static inline int set_cpus_allowed_ptr(struct task_struct *p,
const cpumask_t *new_mask)
{
if (!cpu_isset(0, new_mask))
if (!cpu_isset(0, *new_mask))
return -EINVAL;
return 0;
}
#endif
static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
{
return set_cpus_allowed_ptr(p, &new_mask);
}
extern unsigned long long sched_clock(void);
@ -1551,7 +1582,6 @@ static inline void wake_up_idle_cpu(int cpu) { }
extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_batch_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost;
@ -1564,6 +1594,10 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime;
int sched_rt_handler(struct ctl_table *table, int write,
struct file *filp, void __user *buffer, size_t *lenp,
loff_t *ppos);
extern unsigned int sysctl_sched_compat_yield;
#ifdef CONFIG_RT_MUTEXES
@ -2031,7 +2065,7 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm)
}
#endif
extern long sched_setaffinity(pid_t pid, cpumask_t new_mask);
extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask);
extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
extern int sched_mc_power_savings, sched_smt_power_savings;
@ -2041,8 +2075,11 @@ extern void normalize_rt_tasks(void);
#ifdef CONFIG_GROUP_SCHED
extern struct task_group init_task_group;
#ifdef CONFIG_USER_SCHED
extern struct task_group root_task_group;
#endif
extern struct task_group *sched_create_group(void);
extern struct task_group *sched_create_group(struct task_group *parent);
extern void sched_destroy_group(struct task_group *tg);
extern void sched_move_task(struct task_struct *tsk);
#ifdef CONFIG_FAIR_GROUP_SCHED
@ -2053,6 +2090,9 @@ extern unsigned long sched_group_shares(struct task_group *tg);
extern int sched_group_set_rt_runtime(struct task_group *tg,
long rt_runtime_us);
extern long sched_group_rt_runtime(struct task_group *tg);
extern int sched_group_set_rt_period(struct task_group *tg,
long rt_period_us);
extern long sched_group_rt_period(struct task_group *tg);
#endif
#endif

View file

@ -45,12 +45,16 @@ struct sysdev_class_attribute {
ssize_t (*store)(struct sysdev_class *, const char *, size_t);
};
#define SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) \
struct sysdev_class_attribute attr_##_name = { \
#define _SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) \
{ \
.attr = {.name = __stringify(_name), .mode = _mode }, \
.show = _show, \
.store = _store, \
};
}
#define SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) \
struct sysdev_class_attribute attr_##_name = \
_SYSDEV_CLASS_ATTR(_name,_mode,_show,_store)
extern int sysdev_class_register(struct sysdev_class *);
@ -100,15 +104,16 @@ struct sysdev_attribute {
};
#define _SYSDEV_ATTR(_name,_mode,_show,_store) \
#define _SYSDEV_ATTR(_name, _mode, _show, _store) \
{ \
.attr = { .name = __stringify(_name), .mode = _mode }, \
.show = _show, \
.store = _store, \
}
#define SYSDEV_ATTR(_name,_mode,_show,_store) \
struct sysdev_attribute attr_##_name = _SYSDEV_ATTR(_name,_mode,_show,_store);
#define SYSDEV_ATTR(_name, _mode, _show, _store) \
struct sysdev_attribute attr_##_name = \
_SYSDEV_ATTR(_name, _mode, _show, _store);
extern int sysdev_create_file(struct sys_device *, struct sysdev_attribute *);
extern void sysdev_remove_file(struct sys_device *, struct sysdev_attribute *);

View file

@ -38,16 +38,15 @@
#endif
#ifndef nr_cpus_node
#define nr_cpus_node(node) \
({ \
cpumask_t __tmp__; \
__tmp__ = node_to_cpumask(node); \
cpus_weight(__tmp__); \
#define nr_cpus_node(node) \
({ \
node_to_cpumask_ptr(__tmp__, node); \
cpus_weight(*__tmp__); \
})
#endif
#define for_each_node_with_cpus(node) \
for_each_online_node(node) \
#define for_each_node_with_cpus(node) \
for_each_online_node(node) \
if (nr_cpus_node(node))
void arch_update_cpu_topology(void);
@ -80,7 +79,9 @@ void arch_update_cpu_topology(void);
* by defining their own arch-specific initializer in include/asm/topology.h.
* A definition there will automagically override these default initializers
* and allow arch-specific performance tuning of sched_domains.
* (Only non-zero and non-null fields need be specified.)
*/
#ifdef CONFIG_SCHED_SMT
/* MCD - Do we really need this? It is always on if CONFIG_SCHED_SMT is,
* so can't we drop this in favor of CONFIG_SCHED_SMT?
@ -89,20 +90,10 @@ void arch_update_cpu_topology(void);
/* Common values for SMT siblings */
#ifndef SD_SIBLING_INIT
#define SD_SIBLING_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
.min_interval = 1, \
.max_interval = 2, \
.busy_factor = 64, \
.imbalance_pct = 110, \
.cache_nice_tries = 0, \
.busy_idx = 0, \
.idle_idx = 0, \
.newidle_idx = 0, \
.wake_idx = 0, \
.forkexec_idx = 0, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_NEWIDLE \
| SD_BALANCE_FORK \
@ -112,7 +103,6 @@ void arch_update_cpu_topology(void);
| SD_SHARE_CPUPOWER, \
.last_balance = jiffies, \
.balance_interval = 1, \
.nr_balance_failed = 0, \
}
#endif
#endif /* CONFIG_SCHED_SMT */
@ -121,18 +111,12 @@ void arch_update_cpu_topology(void);
/* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */
#ifndef SD_MC_INIT
#define SD_MC_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
.min_interval = 1, \
.max_interval = 4, \
.busy_factor = 64, \
.imbalance_pct = 125, \
.cache_nice_tries = 1, \
.busy_idx = 2, \
.idle_idx = 0, \
.newidle_idx = 0, \
.wake_idx = 1, \
.forkexec_idx = 1, \
.flags = SD_LOAD_BALANCE \
@ -144,7 +128,6 @@ void arch_update_cpu_topology(void);
| BALANCE_FOR_MC_POWER, \
.last_balance = jiffies, \
.balance_interval = 1, \
.nr_balance_failed = 0, \
}
#endif
#endif /* CONFIG_SCHED_MC */
@ -152,10 +135,6 @@ void arch_update_cpu_topology(void);
/* Common values for CPUs */
#ifndef SD_CPU_INIT
#define SD_CPU_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
.min_interval = 1, \
.max_interval = 4, \
.busy_factor = 64, \
@ -174,16 +153,11 @@ void arch_update_cpu_topology(void);
| BALANCE_FOR_PKG_POWER,\
.last_balance = jiffies, \
.balance_interval = 1, \
.nr_balance_failed = 0, \
}
#endif
/* sched_domains SD_ALLNODES_INIT for NUMA machines */
#define SD_ALLNODES_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
.min_interval = 64, \
.max_interval = 64*num_online_cpus(), \
.busy_factor = 128, \
@ -191,14 +165,10 @@ void arch_update_cpu_topology(void);
.cache_nice_tries = 1, \
.busy_idx = 3, \
.idle_idx = 3, \
.newidle_idx = 0, /* unused */ \
.wake_idx = 0, /* unused */ \
.forkexec_idx = 0, /* unused */ \
.flags = SD_LOAD_BALANCE \
| SD_SERIALIZE, \
.last_balance = jiffies, \
.balance_interval = 64, \
.nr_balance_failed = 0, \
}
#ifdef CONFIG_NUMA