Merge branch 'core/rcu' into sched/core, to pick up dependency

We are going to rely on the loosening of RCU callback semantics,
introduced by this commit:

  806f04e9fd: ("rcu: Allow for smp_call_function() running callbacks from idle")

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2020-05-28 10:52:53 +02:00
commit 58ef57b16d
68 changed files with 2522 additions and 934 deletions

View file

@ -120,12 +120,65 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
/* Annotate a C jump table to allow objtool to follow the code flow */
#define __annotate_jump_table __section(.rodata..c_jump_table)
#ifdef CONFIG_DEBUG_ENTRY
/* Begin/end of an instrumentation safe region */
#define instrumentation_begin() ({ \
asm volatile("%c0:\n\t" \
".pushsection .discard.instr_begin\n\t" \
".long %c0b - .\n\t" \
".popsection\n\t" : : "i" (__COUNTER__)); \
})
/*
* Because instrumentation_{begin,end}() can nest, objtool validation considers
* _begin() a +1 and _end() a -1 and computes a sum over the instructions.
* When the value is greater than 0, we consider instrumentation allowed.
*
* There is a problem with code like:
*
* noinstr void foo()
* {
* instrumentation_begin();
* ...
* if (cond) {
* instrumentation_begin();
* ...
* instrumentation_end();
* }
* bar();
* instrumentation_end();
* }
*
* If instrumentation_end() would be an empty label, like all the other
* annotations, the inner _end(), which is at the end of a conditional block,
* would land on the instruction after the block.
*
* If we then consider the sum of the !cond path, we'll see that the call to
* bar() is with a 0-value, even though, we meant it to happen with a positive
* value.
*
* To avoid this, have _end() be a NOP instruction, this ensures it will be
* part of the condition block and does not escape.
*/
#define instrumentation_end() ({ \
asm volatile("%c0: nop\n\t" \
".pushsection .discard.instr_end\n\t" \
".long %c0b - .\n\t" \
".popsection\n\t" : : "i" (__COUNTER__)); \
})
#endif /* CONFIG_DEBUG_ENTRY */
#else
#define annotate_reachable()
#define annotate_unreachable()
#define __annotate_jump_table
#endif
#ifndef instrumentation_begin
#define instrumentation_begin() do { } while(0)
#define instrumentation_end() do { } while(0)
#endif
#ifndef ASM_UNREACHABLE
# define ASM_UNREACHABLE
#endif

View file

@ -118,6 +118,10 @@ struct ftrace_likely_data {
#define notrace __attribute__((__no_instrument_function__))
#endif
/* Section for code which can't be instrumented at all */
#define noinstr \
noinline notrace __attribute((__section__(".noinstr.text")))
/*
* it doesn't make sense on ARM (currently the only user of __naked)
* to trace naked functions because then mcount is called without

View file

@ -2,15 +2,6 @@
#ifndef _LINUX_FTRACE_IRQ_H
#define _LINUX_FTRACE_IRQ_H
#ifdef CONFIG_FTRACE_NMI_ENTER
extern void arch_ftrace_nmi_enter(void);
extern void arch_ftrace_nmi_exit(void);
#else
static inline void arch_ftrace_nmi_enter(void) { }
static inline void arch_ftrace_nmi_exit(void) { }
#endif
#ifdef CONFIG_HWLAT_TRACER
extern bool trace_hwlat_callback_enabled;
extern void trace_hwlat_callback(bool enter);
@ -22,12 +13,10 @@ static inline void ftrace_nmi_enter(void)
if (trace_hwlat_callback_enabled)
trace_hwlat_callback(true);
#endif
arch_ftrace_nmi_enter();
}
static inline void ftrace_nmi_exit(void)
{
arch_ftrace_nmi_exit();
#ifdef CONFIG_HWLAT_TRACER
if (trace_hwlat_callback_enabled)
trace_hwlat_callback(false);

View file

@ -65,14 +65,26 @@ extern void irq_exit(void);
#define arch_nmi_exit() do { } while (0)
#endif
/*
* NMI vs Tracing
* --------------
*
* We must not land in a tracer until (or after) we've changed preempt_count
* such that in_nmi() becomes true. To that effect all NMI C entry points must
* be marked 'notrace' and call nmi_enter() as soon as possible.
*/
/*
* nmi_enter() can nest up to 15 times; see NMI_BITS.
*/
#define nmi_enter() \
do { \
arch_nmi_enter(); \
printk_nmi_enter(); \
lockdep_off(); \
ftrace_nmi_enter(); \
BUG_ON(in_nmi()); \
preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
BUG_ON(in_nmi() == NMI_MASK); \
__preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
rcu_nmi_enter(); \
lockdep_hardirq_enter(); \
} while (0)
@ -82,7 +94,7 @@ extern void irq_exit(void);
lockdep_hardirq_exit(); \
rcu_nmi_exit(); \
BUG_ON(!in_nmi()); \
preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
__preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
ftrace_nmi_exit(); \
lockdep_on(); \
printk_nmi_exit(); \

View file

@ -308,8 +308,27 @@ extern void lockdep_set_selftest_task(struct task_struct *task);
extern void lockdep_init_task(struct task_struct *task);
extern void lockdep_off(void);
extern void lockdep_on(void);
/*
* Split the recrursion counter in two to readily detect 'off' vs recursion.
*/
#define LOCKDEP_RECURSION_BITS 16
#define LOCKDEP_OFF (1U << LOCKDEP_RECURSION_BITS)
#define LOCKDEP_RECURSION_MASK (LOCKDEP_OFF - 1)
/*
* lockdep_{off,on}() are macros to avoid tracing and kprobes; not inlines due
* to header dependencies.
*/
#define lockdep_off() \
do { \
current->lockdep_recursion += LOCKDEP_OFF; \
} while (0)
#define lockdep_on() \
do { \
current->lockdep_recursion -= LOCKDEP_OFF; \
} while (0)
extern void lockdep_register_key(struct lock_class_key *key);
extern void lockdep_unregister_key(struct lock_class_key *key);

View file

@ -26,13 +26,13 @@
* PREEMPT_MASK: 0x000000ff
* SOFTIRQ_MASK: 0x0000ff00
* HARDIRQ_MASK: 0x000f0000
* NMI_MASK: 0x00100000
* NMI_MASK: 0x00f00000
* PREEMPT_NEED_RESCHED: 0x80000000
*/
#define PREEMPT_BITS 8
#define SOFTIRQ_BITS 8
#define HARDIRQ_BITS 4
#define NMI_BITS 1
#define NMI_BITS 4
#define PREEMPT_SHIFT 0
#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)

View file

@ -371,7 +371,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
* @pos: the type * to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the list_head within the struct.
* @cond...: optional lockdep expression if called from non-RCU protection.
* @cond: optional lockdep expression if called from non-RCU protection.
*
* This list-traversal primitive may safely run concurrently with
* the _rcu list-mutation primitives such as list_add_rcu()
@ -646,7 +646,7 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
* @pos: the type * to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the hlist_node within the struct.
* @cond...: optional lockdep expression if called from non-RCU protection.
* @cond: optional lockdep expression if called from non-RCU protection.
*
* This list-traversal primitive may safely run concurrently with
* the _rcu list-mutation primitives such as hlist_add_head_rcu()

View file

@ -37,6 +37,7 @@
/* Exported common interfaces */
void call_rcu(struct rcu_head *head, rcu_callback_t func);
void rcu_barrier_tasks(void);
void rcu_barrier_tasks_rude(void);
void synchronize_rcu(void);
#ifdef CONFIG_PREEMPT_RCU
@ -129,25 +130,57 @@ static inline void rcu_init_nohz(void) { }
* Note a quasi-voluntary context switch for RCU-tasks's benefit.
* This is a macro rather than an inline function to avoid #include hell.
*/
#ifdef CONFIG_TASKS_RCU
#define rcu_tasks_qs(t) \
do { \
if (READ_ONCE((t)->rcu_tasks_holdout)) \
WRITE_ONCE((t)->rcu_tasks_holdout, false); \
#ifdef CONFIG_TASKS_RCU_GENERIC
# ifdef CONFIG_TASKS_RCU
# define rcu_tasks_classic_qs(t, preempt) \
do { \
if (!(preempt) && READ_ONCE((t)->rcu_tasks_holdout)) \
WRITE_ONCE((t)->rcu_tasks_holdout, false); \
} while (0)
#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t)
void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
void synchronize_rcu_tasks(void);
# else
# define rcu_tasks_classic_qs(t, preempt) do { } while (0)
# define call_rcu_tasks call_rcu
# define synchronize_rcu_tasks synchronize_rcu
# endif
# ifdef CONFIG_TASKS_RCU_TRACE
# define rcu_tasks_trace_qs(t) \
do { \
if (!likely(READ_ONCE((t)->trc_reader_checked)) && \
!unlikely(READ_ONCE((t)->trc_reader_nesting))) { \
smp_store_release(&(t)->trc_reader_checked, true); \
smp_mb(); /* Readers partitioned by store. */ \
} \
} while (0)
# else
# define rcu_tasks_trace_qs(t) do { } while (0)
# endif
#define rcu_tasks_qs(t, preempt) \
do { \
rcu_tasks_classic_qs((t), (preempt)); \
rcu_tasks_trace_qs((t)); \
} while (0)
# ifdef CONFIG_TASKS_RUDE_RCU
void call_rcu_tasks_rude(struct rcu_head *head, rcu_callback_t func);
void synchronize_rcu_tasks_rude(void);
# endif
#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false)
void exit_tasks_rcu_start(void);
void exit_tasks_rcu_finish(void);
#else /* #ifdef CONFIG_TASKS_RCU */
#define rcu_tasks_qs(t) do { } while (0)
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
#define rcu_tasks_qs(t, preempt) do { } while (0)
#define rcu_note_voluntary_context_switch(t) do { } while (0)
#define call_rcu_tasks call_rcu
#define synchronize_rcu_tasks synchronize_rcu
static inline void exit_tasks_rcu_start(void) { }
static inline void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU */
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
/**
* cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
@ -158,7 +191,7 @@ static inline void exit_tasks_rcu_finish(void) { }
*/
#define cond_resched_tasks_rcu_qs() \
do { \
rcu_tasks_qs(current); \
rcu_tasks_qs(current, false); \
cond_resched(); \
} while (0)

View file

@ -0,0 +1,88 @@
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* Read-Copy Update mechanism for mutual exclusion, adapted for tracing.
*
* Copyright (C) 2020 Paul E. McKenney.
*/
#ifndef __LINUX_RCUPDATE_TRACE_H
#define __LINUX_RCUPDATE_TRACE_H
#include <linux/sched.h>
#include <linux/rcupdate.h>
#ifdef CONFIG_DEBUG_LOCK_ALLOC
extern struct lockdep_map rcu_trace_lock_map;
static inline int rcu_read_lock_trace_held(void)
{
return lock_is_held(&rcu_trace_lock_map);
}
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
static inline int rcu_read_lock_trace_held(void)
{
return 1;
}
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#ifdef CONFIG_TASKS_TRACE_RCU
void rcu_read_unlock_trace_special(struct task_struct *t, int nesting);
/**
* rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section
*
* When synchronize_rcu_trace() is invoked by one task, then that task
* is guaranteed to block until all other tasks exit their read-side
* critical sections. Similarly, if call_rcu_trace() is invoked on one
* task while other tasks are within RCU read-side critical sections,
* invocation of the corresponding RCU callback is deferred until after
* the all the other tasks exit their critical sections.
*
* For more details, please see the documentation for rcu_read_lock().
*/
static inline void rcu_read_lock_trace(void)
{
struct task_struct *t = current;
WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1);
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
t->trc_reader_special.b.need_mb)
smp_mb(); // Pairs with update-side barriers
rcu_lock_acquire(&rcu_trace_lock_map);
}
/**
* rcu_read_unlock_trace - mark end of RCU-trace read-side critical section
*
* Pairs with a preceding call to rcu_read_lock_trace(), and nesting is
* allowed. Invoking a rcu_read_unlock_trace() when there is no matching
* rcu_read_lock_trace() is verboten, and will result in lockdep complaints.
*
* For more details, please see the documentation for rcu_read_unlock().
*/
static inline void rcu_read_unlock_trace(void)
{
int nesting;
struct task_struct *t = current;
rcu_lock_release(&rcu_trace_lock_map);
nesting = READ_ONCE(t->trc_reader_nesting) - 1;
if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) {
WRITE_ONCE(t->trc_reader_nesting, nesting);
return; // We assume shallow reader nesting.
}
rcu_read_unlock_trace_special(t, nesting);
}
void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
void synchronize_rcu_tasks_trace(void);
void rcu_barrier_tasks_trace(void);
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
#endif /* __LINUX_RCUPDATE_TRACE_H */

View file

@ -31,4 +31,23 @@ do { \
#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__)
/**
* synchronize_rcu_mult - Wait concurrently for multiple grace periods
* @...: List of call_rcu() functions for different grace periods to wait on
*
* This macro waits concurrently for multiple types of RCU grace periods.
* For example, synchronize_rcu_mult(call_rcu, call_rcu_tasks) would wait
* on concurrent RCU and RCU-tasks grace periods. Waiting on a given SRCU
* domain requires you to write a wrapper function for that SRCU domain's
* call_srcu() function, with this wrapper supplying the pointer to the
* corresponding srcu_struct.
*
* The first argument tells Tiny RCU's _wait_rcu_gp() not to
* bother waiting for RCU. The reason for this is because anywhere
* synchronize_rcu_mult() can be called is automatically already a full
* grace period.
*/
#define synchronize_rcu_mult(...) \
_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__)
#endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */

View file

@ -49,7 +49,7 @@ static inline void rcu_softirq_qs(void)
#define rcu_note_context_switch(preempt) \
do { \
rcu_qs(); \
rcu_tasks_qs(current); \
rcu_tasks_qs(current, (preempt)); \
} while (0)
static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
@ -71,6 +71,7 @@ static inline void rcu_irq_enter(void) { }
static inline void rcu_irq_exit_irqson(void) { }
static inline void rcu_irq_enter_irqson(void) { }
static inline void rcu_irq_exit(void) { }
static inline void rcu_irq_exit_preempt(void) { }
static inline void exit_rcu(void) { }
static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
@ -85,8 +86,10 @@ static inline void rcu_scheduler_starting(void) { }
static inline void rcu_end_inkernel_boot(void) { }
static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
static inline bool rcu_is_watching(void) { return true; }
static inline bool __rcu_is_watching(void) { return true; }
static inline void rcu_momentary_dyntick_idle(void) { }
static inline void kfree_rcu_scheduler_running(void) { }
static inline bool rcu_gp_might_be_stalled(void) { return false; }
/* Avoid RCU read-side critical sections leaking across. */
static inline void rcu_all_qs(void) { barrier(); }

View file

@ -39,6 +39,7 @@ void rcu_barrier(void);
bool rcu_eqs_special_set(int cpu);
void rcu_momentary_dyntick_idle(void);
void kfree_rcu_scheduler_running(void);
bool rcu_gp_might_be_stalled(void);
unsigned long get_state_synchronize_rcu(void);
void cond_synchronize_rcu(unsigned long oldstate);
@ -46,6 +47,7 @@ void rcu_idle_enter(void);
void rcu_idle_exit(void);
void rcu_irq_enter(void);
void rcu_irq_exit(void);
void rcu_irq_exit_preempt(void);
void rcu_irq_enter_irqson(void);
void rcu_irq_exit_irqson(void);
@ -56,6 +58,7 @@ extern int rcu_scheduler_active __read_mostly;
void rcu_end_inkernel_boot(void);
bool rcu_inkernel_boot_has_ended(void);
bool rcu_is_watching(void);
bool __rcu_is_watching(void);
#ifndef CONFIG_PREEMPTION
void rcu_all_qs(void);
#endif

View file

@ -613,7 +613,7 @@ union rcu_special {
u8 blocked;
u8 need_qs;
u8 exp_hint; /* Hint for performance. */
u8 deferred_qs;
u8 need_mb; /* Readers need smp_mb(). */
} b; /* Bits. */
u32 s; /* Set of bits. */
};
@ -724,6 +724,14 @@ struct task_struct {
struct list_head rcu_tasks_holdout_list;
#endif /* #ifdef CONFIG_TASKS_RCU */
#ifdef CONFIG_TASKS_TRACE_RCU
int trc_reader_nesting;
int trc_ipi_to_cpu;
union rcu_special trc_reader_special;
bool trc_reader_checked;
struct list_head trc_holdout_list;
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
struct sched_info sched_info;
struct list_head tasks;
@ -1289,6 +1297,12 @@ struct task_struct {
unsigned long prev_lowest_stack;
#endif
#ifdef CONFIG_X86_MCE
u64 mce_addr;
u64 mce_status;
struct callback_head mce_kill_me;
#endif
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.

View file

@ -89,7 +89,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp);
#ifdef CONFIG_PREEMPTION
#define torture_preempt_schedule() preempt_schedule()
#else
#define torture_preempt_schedule()
#define torture_preempt_schedule() do { } while (0)
#endif
#endif /* __LINUX_TORTURE_H */

View file

@ -1149,4 +1149,6 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i
(wait)->flags = 0; \
} while (0)
bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg);
#endif /* _LINUX_WAIT_H */