Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar: - Apply a number of membarrier related fixes and cleanups, which fixes a use-after-free race in the membarrier code - Introduce proper RCU protection for tasks on the runqueue - to get rid of the subtle task_rcu_dereference() interface that was easy to get wrong - Misc fixes, but also an EAS speedup * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/fair: Avoid redundant EAS calculation sched/core: Remove double update_max_interval() call on CPU startup sched/core: Fix preempt_schedule() interrupt return comment sched/fair: Fix -Wunused-but-set-variable warnings sched/core: Fix migration to invalid CPU in __set_cpus_allowed_ptr() sched/membarrier: Return -ENOMEM to userspace on memory allocation failure sched/membarrier: Skip IPIs when mm->mm_users == 1 selftests, sched/membarrier: Add multi-threaded test sched/membarrier: Fix p->mm->membarrier_state racy load sched/membarrier: Call sync_core only before usermode for same mm sched/membarrier: Remove redundant check sched/membarrier: Fix private expedited registration check tasks, sched/core: RCUify the assignment of rq->curr tasks, sched/core: With a grace period after finish_task_switch(), remove unnecessary code tasks, sched/core: Ensure tasks are available for a grace period after leaving the runqueue tasks: Add a count of task RCU users sched/core: Convert vcpu_is_preempted() from macro to an inline function sched/fair: Remove unused cfs_rq_clock_task() function
2019-09-28 12:39:07 -07:00 · 2019-09-28 12:39:07 -07:00 · 9c5efe9ae7
commit 9c5efe9ae7
parent aefcf2f4b5 4892f51ad5
17 changed files with 375 additions and 250 deletions
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@ -383,6 +383,16 @@ struct mm_struct {
 		unsigned long highest_vm_end;	/* highest vma end address */
 		pgd_t * pgd;

+#ifdef CONFIG_MEMBARRIER
+		/**
+		 * @membarrier_state: Flags controlling membarrier behavior.
+		 *
+		 * This field is close to @pgd to hopefully fit in the same
+		 * cache-line, which needs to be touched by switch_mm().
+		 */
+		atomic_t membarrier_state;
+#endif
+
 		/**
 		 * @mm_users: The number of users including userspace.
 		 *
@ -452,9 +462,7 @@ struct mm_struct {
 		unsigned long flags; /* Must use atomic bitops to access */

 		struct core_state *core_state; /* coredumping support */
-#ifdef CONFIG_MEMBARRIER
-		atomic_t membarrier_state;
-#endif
+
 #ifdef CONFIG_AIO
 		spinlock_t			ioctx_lock;
 		struct kioctx_table __rcu	*ioctx_table;
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@ -6,16 +6,11 @@

 /*
 * rcuwait provides a way of blocking and waking up a single
- * task in an rcu-safe manner; where it is forbidden to use
- * after exit_notify(). task_struct is not properly rcu protected,
- * unless dealing with rcu-aware lists, ie: find_task_by_*().
+ * task in an rcu-safe manner.
 *
- * Alternatively we have task_rcu_dereference(), but the return
- * semantics have different implications which would break the
- * wakeup side. The only time @task is non-nil is when a user is
- * blocked (or checking if it needs to) on a condition, and reset
- * as soon as we know that the condition has succeeded and are
- * awoken.
+ * The only time @task is non-nil is when a user is blocked (or
+ * checking if it needs to) on a condition, and reset as soon as we
+ * know that the condition has succeeded and are awoken.
 */
 struct rcuwait {
 	struct task_struct __rcu *task;
@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w);
 */
 #define rcuwait_wait_event(w, condition)				\
 ({									\
-	/*								\
-	 * Complain if we are called after do_exit()/exit_notify(),     \
-	 * as we cannot rely on the rcu critical region for the		\
-	 * wakeup side.							\
-	 */                                                             \
-	WARN_ON(current->exit_state);                                   \
-									\
 	rcu_assign_pointer((w)->task, current);				\
 	for (;;) {							\
 		/*							\
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@ -1130,7 +1130,10 @@ struct task_struct {

 	struct tlbflush_unmap_batch	tlb_ubc;

-	struct rcu_head			rcu;
+	union {
+		refcount_t		rcu_users;
+		struct rcu_head		rcu;
+	};

 	/* Cache last used pipe for splice(): */
 	struct pipe_inode_info		*splice_pipe;
@ -1839,7 +1842,10 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 * running or not.
 */
 #ifndef vcpu_is_preempted
-# define vcpu_is_preempted(cpu)	false
+static inline bool vcpu_is_preempted(int cpu)
+{
+	return false;
+}
 #endif

 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@ -362,16 +362,16 @@ enum {

 static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
 {
+	if (current->mm != mm)
+		return;
 	if (likely(!(atomic_read(&mm->membarrier_state) &
 		     MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE)))
 		return;
 	sync_core_before_usermode();
 }

-static inline void membarrier_execve(struct task_struct *t)
-{
-	atomic_set(&t->mm->membarrier_state, 0);
-}
+extern void membarrier_exec_mmap(struct mm_struct *mm);
+
 #else
 #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
 static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
@ -380,7 +380,7 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
 {
 }
 #endif
-static inline void membarrier_execve(struct task_struct *t)
+static inline void membarrier_exec_mmap(struct mm_struct *mm)
 {
 }
 static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@ -119,7 +119,7 @@ static inline void put_task_struct(struct task_struct *t)
 		__put_task_struct(t);
 }

-struct task_struct *task_rcu_dereference(struct task_struct **ptask);
+void put_task_struct_rcu_user(struct task_struct *task);

 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
 extern int arch_task_struct_size __read_mostly;