Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:

 - Apply a number of membarrier related fixes and cleanups, which fixes
   a use-after-free race in the membarrier code

 - Introduce proper RCU protection for tasks on the runqueue - to get
   rid of the subtle task_rcu_dereference() interface that was easy to
   get wrong

 - Misc fixes, but also an EAS speedup

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/fair: Avoid redundant EAS calculation
  sched/core: Remove double update_max_interval() call on CPU startup
  sched/core: Fix preempt_schedule() interrupt return comment
  sched/fair: Fix -Wunused-but-set-variable warnings
  sched/core: Fix migration to invalid CPU in __set_cpus_allowed_ptr()
  sched/membarrier: Return -ENOMEM to userspace on memory allocation failure
  sched/membarrier: Skip IPIs when mm->mm_users == 1
  selftests, sched/membarrier: Add multi-threaded test
  sched/membarrier: Fix p->mm->membarrier_state racy load
  sched/membarrier: Call sync_core only before usermode for same mm
  sched/membarrier: Remove redundant check
  sched/membarrier: Fix private expedited registration check
  tasks, sched/core: RCUify the assignment of rq->curr
  tasks, sched/core: With a grace period after finish_task_switch(), remove unnecessary code
  tasks, sched/core: Ensure tasks are available for a grace period after leaving the runqueue
  tasks: Add a count of task RCU users
  sched/core: Convert vcpu_is_preempted() from macro to an inline function
  sched/fair: Remove unused cfs_rq_clock_task() function
This commit is contained in:
Linus Torvalds 2019-09-28 12:39:07 -07:00
commit 9c5efe9ae7
17 changed files with 375 additions and 250 deletions

View file

@ -383,6 +383,16 @@ struct mm_struct {
unsigned long highest_vm_end; /* highest vma end address */
pgd_t * pgd;
#ifdef CONFIG_MEMBARRIER
/**
* @membarrier_state: Flags controlling membarrier behavior.
*
* This field is close to @pgd to hopefully fit in the same
* cache-line, which needs to be touched by switch_mm().
*/
atomic_t membarrier_state;
#endif
/**
* @mm_users: The number of users including userspace.
*
@ -452,9 +462,7 @@ struct mm_struct {
unsigned long flags; /* Must use atomic bitops to access */
struct core_state *core_state; /* coredumping support */
#ifdef CONFIG_MEMBARRIER
atomic_t membarrier_state;
#endif
#ifdef CONFIG_AIO
spinlock_t ioctx_lock;
struct kioctx_table __rcu *ioctx_table;

View file

@ -6,16 +6,11 @@
/*
* rcuwait provides a way of blocking and waking up a single
* task in an rcu-safe manner; where it is forbidden to use
* after exit_notify(). task_struct is not properly rcu protected,
* unless dealing with rcu-aware lists, ie: find_task_by_*().
* task in an rcu-safe manner.
*
* Alternatively we have task_rcu_dereference(), but the return
* semantics have different implications which would break the
* wakeup side. The only time @task is non-nil is when a user is
* blocked (or checking if it needs to) on a condition, and reset
* as soon as we know that the condition has succeeded and are
* awoken.
* The only time @task is non-nil is when a user is blocked (or
* checking if it needs to) on a condition, and reset as soon as we
* know that the condition has succeeded and are awoken.
*/
struct rcuwait {
struct task_struct __rcu *task;
@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w);
*/
#define rcuwait_wait_event(w, condition) \
({ \
/* \
* Complain if we are called after do_exit()/exit_notify(), \
* as we cannot rely on the rcu critical region for the \
* wakeup side. \
*/ \
WARN_ON(current->exit_state); \
\
rcu_assign_pointer((w)->task, current); \
for (;;) { \
/* \

View file

@ -1130,7 +1130,10 @@ struct task_struct {
struct tlbflush_unmap_batch tlb_ubc;
struct rcu_head rcu;
union {
refcount_t rcu_users;
struct rcu_head rcu;
};
/* Cache last used pipe for splice(): */
struct pipe_inode_info *splice_pipe;
@ -1839,7 +1842,10 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
* running or not.
*/
#ifndef vcpu_is_preempted
# define vcpu_is_preempted(cpu) false
static inline bool vcpu_is_preempted(int cpu)
{
return false;
}
#endif
extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);

View file

@ -362,16 +362,16 @@ enum {
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
{
if (current->mm != mm)
return;
if (likely(!(atomic_read(&mm->membarrier_state) &
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE)))
return;
sync_core_before_usermode();
}
static inline void membarrier_execve(struct task_struct *t)
{
atomic_set(&t->mm->membarrier_state, 0);
}
extern void membarrier_exec_mmap(struct mm_struct *mm);
#else
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
@ -380,7 +380,7 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
{
}
#endif
static inline void membarrier_execve(struct task_struct *t)
static inline void membarrier_exec_mmap(struct mm_struct *mm)
{
}
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)

View file

@ -119,7 +119,7 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t);
}
struct task_struct *task_rcu_dereference(struct task_struct **ptask);
void put_task_struct_rcu_user(struct task_struct *task);
#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
extern int arch_task_struct_size __read_mostly;