Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar:
 "The main changes in this cycle were:

   - Continued user-access cleanups in the futex code.

   - percpu-rwsem rewrite that uses its own waitqueue and atomic_t
     instead of an embedded rwsem. This addresses a couple of
     weaknesses, but the primary motivation was complications on the -rt
     kernel.

   - Introduce raw lock nesting detection on lockdep
     (CONFIG_PROVE_RAW_LOCK_NESTING=y), document the raw_lock vs. normal
     lock differences. This too originates from -rt.

   - Reuse lockdep zapped chain_hlocks entries, to conserve RAM
     footprint on distro-ish kernels running into the "BUG:
     MAX_LOCKDEP_CHAIN_HLOCKS too low!" depletion of the lockdep
     chain-entries pool.

   - Misc cleanups, smaller fixes and enhancements - see the changelog
     for details"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (55 commits)
  fs/buffer: Make BH_Uptodate_Lock bit_spin_lock a regular spinlock_t
  thermal/x86_pkg_temp: Make pkg_temp_lock a raw_spinlock_t
  Documentation/locking/locktypes: Minor copy editor fixes
  Documentation/locking/locktypes: Further clarifications and wordsmithing
  m68knommu: Remove mm.h include from uaccess_no.h
  x86: get rid of user_atomic_cmpxchg_inatomic()
  generic arch_futex_atomic_op_inuser() doesn't need access_ok()
  x86: don't reload after cmpxchg in unsafe_atomic_op2() loop
  x86: convert arch_futex_atomic_op_inuser() to user_access_begin/user_access_end()
  objtool: whitelist __sanitizer_cov_trace_switch()
  [parisc, s390, sparc64] no need for access_ok() in futex handling
  sh: no need of access_ok() in arch_futex_atomic_op_inuser()
  futex: arch_futex_atomic_op_inuser() calling conventions change
  completion: Use lockdep_assert_RT_in_threaded_ctx() in complete_all()
  lockdep: Add posixtimer context tracing bits
  lockdep: Annotate irq_work
  lockdep: Add hrtimer context tracing bits
  lockdep: Introduce wait-type checks
  completion: Use simple wait queues
  sched/swait: Prepare usage in completions
  ...
This commit is contained in:
Linus Torvalds 2020-03-30 16:17:15 -07:00
commit 4b9fd8a829
85 changed files with 1622 additions and 713 deletions

File diff suppressed because it is too large Load diff

View file

@ -106,6 +106,12 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
#define STACK_TRACE_HASH_SIZE 16384
#endif
/*
* Bit definitions for lock_chain.irq_context
*/
#define LOCK_CHAIN_SOFTIRQ_CONTEXT (1 << 0)
#define LOCK_CHAIN_HARDIRQ_CONTEXT (1 << 1)
#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
@ -124,17 +130,21 @@ extern const char *__get_key_name(const struct lockdep_subclass_key *key,
struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i);
extern unsigned long nr_lock_classes;
extern unsigned long nr_zapped_classes;
extern unsigned long nr_zapped_lock_chains;
extern unsigned long nr_list_entries;
long lockdep_next_lockchain(long i);
unsigned long lock_chain_count(void);
extern int nr_chain_hlocks;
extern unsigned long nr_stack_trace_entries;
extern unsigned int nr_hardirq_chains;
extern unsigned int nr_softirq_chains;
extern unsigned int nr_process_chains;
extern unsigned int max_lockdep_depth;
extern unsigned int nr_free_chain_hlocks;
extern unsigned int nr_lost_chain_hlocks;
extern unsigned int nr_large_chain_blocks;
extern unsigned int max_lockdep_depth;
extern unsigned int max_bfs_queue_depth;
#ifdef CONFIG_PROVE_LOCKING

View file

@ -128,15 +128,22 @@ static int lc_show(struct seq_file *m, void *v)
struct lock_chain *chain = v;
struct lock_class *class;
int i;
static const char * const irq_strs[] = {
[0] = "0",
[LOCK_CHAIN_HARDIRQ_CONTEXT] = "hardirq",
[LOCK_CHAIN_SOFTIRQ_CONTEXT] = "softirq",
[LOCK_CHAIN_SOFTIRQ_CONTEXT|
LOCK_CHAIN_HARDIRQ_CONTEXT] = "hardirq|softirq",
};
if (v == SEQ_START_TOKEN) {
if (nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)
if (!nr_free_chain_hlocks)
seq_printf(m, "(buggered) ");
seq_printf(m, "all lock chains:\n");
return 0;
}
seq_printf(m, "irq_context: %d\n", chain->irq_context);
seq_printf(m, "irq_context: %s\n", irq_strs[chain->irq_context]);
for (i = 0; i < chain->depth; i++) {
class = lock_chain_get_class(chain, i);
@ -271,8 +278,12 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
#ifdef CONFIG_PROVE_LOCKING
seq_printf(m, " dependency chains: %11lu [max: %lu]\n",
lock_chain_count(), MAX_LOCKDEP_CHAINS);
seq_printf(m, " dependency chain hlocks: %11d [max: %lu]\n",
nr_chain_hlocks, MAX_LOCKDEP_CHAIN_HLOCKS);
seq_printf(m, " dependency chain hlocks used: %11lu [max: %lu]\n",
MAX_LOCKDEP_CHAIN_HLOCKS -
(nr_free_chain_hlocks + nr_lost_chain_hlocks),
MAX_LOCKDEP_CHAIN_HLOCKS);
seq_printf(m, " dependency chain hlocks lost: %11u\n",
nr_lost_chain_hlocks);
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
@ -336,6 +347,18 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
seq_printf(m, " debug_locks: %11u\n",
debug_locks);
/*
* Zappped classes and lockdep data buffers reuse statistics.
*/
seq_puts(m, "\n");
seq_printf(m, " zapped classes: %11lu\n",
nr_zapped_classes);
#ifdef CONFIG_PROVE_LOCKING
seq_printf(m, " zapped lock chains: %11lu\n",
nr_zapped_lock_chains);
seq_printf(m, " large chain blocks: %11u\n",
nr_large_chain_blocks);
#endif
return 0;
}

View file

@ -85,7 +85,7 @@ void debug_mutex_init(struct mutex *lock, const char *name,
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
lockdep_init_map(&lock->dep_map, name, key, 0);
lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP);
#endif
lock->magic = lock;
}

View file

@ -1,27 +1,29 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/atomic.h>
#include <linux/rwsem.h>
#include <linux/percpu.h>
#include <linux/wait.h>
#include <linux/lockdep.h>
#include <linux/percpu-rwsem.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/sched/task.h>
#include <linux/errno.h>
#include "rwsem.h"
int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
const char *name, struct lock_class_key *rwsem_key)
const char *name, struct lock_class_key *key)
{
sem->read_count = alloc_percpu(int);
if (unlikely(!sem->read_count))
return -ENOMEM;
/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
rcu_sync_init(&sem->rss);
__init_rwsem(&sem->rw_sem, name, rwsem_key);
rcuwait_init(&sem->writer);
sem->readers_block = 0;
init_waitqueue_head(&sem->waiters);
atomic_set(&sem->block, 0);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
lockdep_init_map(&sem->dep_map, name, key, 0);
#endif
return 0;
}
EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
@ -41,73 +43,139 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
}
EXPORT_SYMBOL_GPL(percpu_free_rwsem);
int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
{
__this_cpu_inc(*sem->read_count);
/*
* Due to having preemption disabled the decrement happens on
* the same CPU as the increment, avoiding the
* increment-on-one-CPU-and-decrement-on-another problem.
*
* If the reader misses the writer's assignment of readers_block, then
* the writer is guaranteed to see the reader's increment.
* If the reader misses the writer's assignment of sem->block, then the
* writer is guaranteed to see the reader's increment.
*
* Conversely, any readers that increment their sem->read_count after
* the writer looks are guaranteed to see the readers_block value,
* which in turn means that they are guaranteed to immediately
* decrement their sem->read_count, so that it doesn't matter that the
* writer missed them.
* the writer looks are guaranteed to see the sem->block value, which
* in turn means that they are guaranteed to immediately decrement
* their sem->read_count, so that it doesn't matter that the writer
* missed them.
*/
smp_mb(); /* A matches D */
/*
* If !readers_block the critical section starts here, matched by the
* If !sem->block the critical section starts here, matched by the
* release in percpu_up_write().
*/
if (likely(!smp_load_acquire(&sem->readers_block)))
return 1;
if (likely(!atomic_read_acquire(&sem->block)))
return true;
/*
* Per the above comment; we still have preemption disabled and
* will thus decrement on the same CPU as we incremented.
*/
__percpu_up_read(sem);
if (try)
return 0;
/*
* We either call schedule() in the wait, or we'll fall through
* and reschedule on the preempt_enable() in percpu_down_read().
*/
preempt_enable_no_resched();
/*
* Avoid lockdep for the down/up_read() we already have them.
*/
__down_read(&sem->rw_sem);
this_cpu_inc(*sem->read_count);
__up_read(&sem->rw_sem);
preempt_disable();
return 1;
}
EXPORT_SYMBOL_GPL(__percpu_down_read);
void __percpu_up_read(struct percpu_rw_semaphore *sem)
{
smp_mb(); /* B matches C */
/*
* In other words, if they see our decrement (presumably to aggregate
* zero, as that is the only time it matters) they will also see our
* critical section.
*/
__this_cpu_dec(*sem->read_count);
/* Prod writer to recheck readers_active */
/* Prod writer to re-evaluate readers_active_check() */
rcuwait_wake_up(&sem->writer);
return false;
}
EXPORT_SYMBOL_GPL(__percpu_up_read);
static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem)
{
if (atomic_read(&sem->block))
return false;
return atomic_xchg(&sem->block, 1) == 0;
}
static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader)
{
if (reader) {
bool ret;
preempt_disable();
ret = __percpu_down_read_trylock(sem);
preempt_enable();
return ret;
}
return __percpu_down_write_trylock(sem);
}
/*
* The return value of wait_queue_entry::func means:
*
* <0 - error, wakeup is terminated and the error is returned
* 0 - no wakeup, a next waiter is tried
* >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive.
*
* We use EXCLUSIVE for both readers and writers to preserve FIFO order,
* and play games with the return value to allow waking multiple readers.
*
* Specifically, we wake readers until we've woken a single writer, or until a
* trylock fails.
*/
static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
unsigned int mode, int wake_flags,
void *key)
{
struct task_struct *p = get_task_struct(wq_entry->private);
bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
struct percpu_rw_semaphore *sem = key;
/* concurrent against percpu_down_write(), can get stolen */
if (!__percpu_rwsem_trylock(sem, reader))
return 1;
list_del_init(&wq_entry->entry);
smp_store_release(&wq_entry->private, NULL);
wake_up_process(p);
put_task_struct(p);
return !reader; /* wake (readers until) 1 writer */
}
static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
{
DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
bool wait;
spin_lock_irq(&sem->waiters.lock);
/*
* Serialize against the wakeup in percpu_up_write(), if we fail
* the trylock, the wakeup must see us on the list.
*/
wait = !__percpu_rwsem_trylock(sem, reader);
if (wait) {
wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM;
__add_wait_queue_entry_tail(&sem->waiters, &wq_entry);
}
spin_unlock_irq(&sem->waiters.lock);
while (wait) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!smp_load_acquire(&wq_entry.private))
break;
schedule();
}
__set_current_state(TASK_RUNNING);
}
bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
{
if (__percpu_down_read_trylock(sem))
return true;
if (try)
return false;
preempt_enable();
percpu_rwsem_wait(sem, /* .reader = */ true);
preempt_disable();
return true;
}
EXPORT_SYMBOL_GPL(__percpu_down_read);
#define per_cpu_sum(var) \
({ \
@ -124,6 +192,8 @@ EXPORT_SYMBOL_GPL(__percpu_up_read);
* zero. If this sum is zero, then it is stable due to the fact that if any
* newly arriving readers increment a given counter, they will immediately
* decrement that same counter.
*
* Assumes sem->block is set.
*/
static bool readers_active_check(struct percpu_rw_semaphore *sem)
{
@ -142,32 +212,36 @@ static bool readers_active_check(struct percpu_rw_semaphore *sem)
void percpu_down_write(struct percpu_rw_semaphore *sem)
{
might_sleep();
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
/* Notify readers to take the slow path. */
rcu_sync_enter(&sem->rss);
down_write(&sem->rw_sem);
/*
* Try set sem->block; this provides writer-writer exclusion.
* Having sem->block set makes new readers block.
*/
if (!__percpu_down_write_trylock(sem))
percpu_rwsem_wait(sem, /* .reader = */ false);
/* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */
/*
* Notify new readers to block; up until now, and thus throughout the
* longish rcu_sync_enter() above, new readers could still come in.
*/
WRITE_ONCE(sem->readers_block, 1);
smp_mb(); /* D matches A */
/*
* If they don't see our writer of readers_block, then we are
* guaranteed to see their sem->read_count increment, and therefore
* will wait for them.
* If they don't see our store of sem->block, then we are guaranteed to
* see their sem->read_count increment, and therefore will wait for
* them.
*/
/* Wait for all now active readers to complete. */
rcuwait_wait_event(&sem->writer, readers_active_check(sem));
/* Wait for all active readers to complete. */
rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL_GPL(percpu_down_write);
void percpu_up_write(struct percpu_rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, _RET_IP_);
/*
* Signal the writer is done, no fast path yet.
*
@ -178,12 +252,12 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
* Therefore we force it through the slow path which guarantees an
* acquire and thereby guarantees the critical section's consistency.
*/
smp_store_release(&sem->readers_block, 0);
atomic_set_release(&sem->block, 0);
/*
* Release the write lock, this will allow readers back in the game.
* Prod any pending reader/writer to make progress.
*/
up_write(&sem->rw_sem);
__wake_up(&sem->waiters, TASK_NORMAL, 1, sem);
/*
* Once this completes (at least one RCU-sched grace period hence) the

View file

@ -28,7 +28,6 @@
#include <linux/rwsem.h>
#include <linux/atomic.h>
#include "rwsem.h"
#include "lock_events.h"
/*
@ -329,7 +328,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
* Make sure we are not reinitializing a held semaphore:
*/
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
lockdep_init_map(&sem->dep_map, name, key, 0);
lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
#endif
#ifdef CONFIG_DEBUG_RWSEMS
sem->magic = sem;
@ -660,8 +659,6 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
unsigned long flags;
bool ret = true;
BUILD_BUG_ON(!(RWSEM_OWNER_UNKNOWN & RWSEM_NONSPINNABLE));
if (need_resched()) {
lockevent_inc(rwsem_opt_fail);
return false;
@ -1338,7 +1335,7 @@ static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
/*
* lock for reading
*/
inline void __down_read(struct rw_semaphore *sem)
static inline void __down_read(struct rw_semaphore *sem)
{
if (!rwsem_read_trylock(sem)) {
rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE);
@ -1426,7 +1423,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
/*
* unlock after reading
*/
inline void __up_read(struct rw_semaphore *sem)
static inline void __up_read(struct rw_semaphore *sem)
{
long tmp;

View file

@ -1,10 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __INTERNAL_RWSEM_H
#define __INTERNAL_RWSEM_H
#include <linux/rwsem.h>
extern void __down_read(struct rw_semaphore *sem);
extern void __up_read(struct rw_semaphore *sem);
#endif /* __INTERNAL_RWSEM_H */

View file

@ -14,14 +14,14 @@
#include <linux/export.h>
void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
struct lock_class_key *key)
struct lock_class_key *key, short inner)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
lockdep_init_map(&lock->dep_map, name, key, 0);
lockdep_init_map_wait(&lock->dep_map, name, key, 0, inner);
#endif
lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
lock->magic = SPINLOCK_MAGIC;
@ -39,7 +39,7 @@ void __rwlock_init(rwlock_t *lock, const char *name,
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
lockdep_init_map(&lock->dep_map, name, key, 0);
lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_CONFIG);
#endif
lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED;
lock->magic = RWLOCK_MAGIC;