Locking updates:

- Move futex code into kernel/futex/ and split up the kitchen sink into
    seperate files to make integration of sys_futex_waitv() simpler.
 
  - Add a new sys_futex_waitv() syscall which allows to wait on multiple
    futexes. The main use case is emulating Windows' WaitForMultipleObjects
    which allows Wine to improve the performance of Windows Games. Also
    native Linux games can benefit from this interface as this is a common
    wait pattern for this kind of applications.
 
  - Add context to ww_mutex_trylock() to provide a path for i915 to rework
    their eviction code step by step without making lockdep upset until the
    final steps of rework are completed. It's also useful for regulator and
    TTM to avoid dropping locks in the non contended path.
 
  - Lockdep and might_sleep() cleanups and improvements
 
  - A few improvements for the RT substitutions.
 
  - The usual small improvements and cleanups.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmF/FTITHHRnbHhAbGlu
 dXRyb25peC5kZQAKCRCmGPVMDXSYoVNZD/9vIm3Bu1Coz8tbNXz58AiCYq9Y/vp5
 mzFgSzz+VJTkW5Vh8jo5Uel4rCKZyt+rL276EoaRPzYl8KFtWDbpK3qd3PrXKqTX
 At49JO4ttAMJUHIBQ6vblEkykmfEd9YPU1uSWk5roJ+s7Jmr5VWnu0FEWHP00As5
 tWOca/TM0ei9kof26V2fl5aecTGII4i4Zsvy+LPsXtI+TnmP0gSBcGAS/5UnZTtJ
 vQRWTR3ojoYvh5iTmNqbaURYoQLe2j8yscn1DSW1CABWVmP12eDWs+N7jRP4b5S9
 73xOv5P7vpva41wxrK2ir5iNkpsLE97VL2JOHTW8nm7orblfiuxHLTCkTjEdd2pO
 h8blI2IBizEB3JYn2BMkOAaZQOSjN8hd6Ye/b2B4AMEGWeXEoEv6eVy/orYKCluQ
 XDqGn47Vce/SYmo5vfTB8VMt6nANx8PKvOP3IvjHInYEQBgiT6QrlUw3RRkXBp5s
 clQkjYYwjAMVIXowcCrdhoKjMROzi6STShVwHwGL8MaZXqr8Vl6BUO9ckU0pY+4C
 F000Hzwxi8lGEQ9k+P+BnYOEzH5osCty8lloKiQ/7ciX6T+CZHGJPGK/iY4YL8P5
 C3CJWMsHCqST7DodNFJmdfZt99UfIMmEhshMDduU9AAH0tHCn8vOu0U6WvCtpyBp
 BvHj68zteAtlYg==
 =RZ4x
 -----END PGP SIGNATURE-----

Merge tag 'locking-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Thomas Gleixner:

 - Move futex code into kernel/futex/ and split up the kitchen sink into
   seperate files to make integration of sys_futex_waitv() simpler.

 - Add a new sys_futex_waitv() syscall which allows to wait on multiple
   futexes.

   The main use case is emulating Windows' WaitForMultipleObjects which
   allows Wine to improve the performance of Windows Games. Also native
   Linux games can benefit from this interface as this is a common wait
   pattern for this kind of applications.

 - Add context to ww_mutex_trylock() to provide a path for i915 to
   rework their eviction code step by step without making lockdep upset
   until the final steps of rework are completed. It's also useful for
   regulator and TTM to avoid dropping locks in the non contended path.

 - Lockdep and might_sleep() cleanups and improvements

 - A few improvements for the RT substitutions.

 - The usual small improvements and cleanups.

* tag 'locking-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits)
  locking: Remove spin_lock_flags() etc
  locking/rwsem: Fix comments about reader optimistic lock stealing conditions
  locking: Remove rcu_read_{,un}lock() for preempt_{dis,en}able()
  locking/rwsem: Disable preemption for spinning region
  docs: futex: Fix kernel-doc references
  futex: Fix PREEMPT_RT build
  futex2: Documentation: Document sys_futex_waitv() uAPI
  selftests: futex: Test sys_futex_waitv() wouldblock
  selftests: futex: Test sys_futex_waitv() timeout
  selftests: futex: Add sys_futex_waitv() test
  futex,arm: Wire up sys_futex_waitv()
  futex,x86: Wire up sys_futex_waitv()
  futex: Implement sys_futex_waitv()
  futex: Simplify double_lock_hb()
  futex: Split out wait/wake
  futex: Split out requeue
  futex: Rename mark_wake_futex()
  futex: Rename: match_futex()
  futex: Rename: hb_waiter_{inc,dec,pending}()
  futex: Split out PI futex
  ...
This commit is contained in:
Linus Torvalds 2021-11-01 13:15:36 -07:00
commit 595b28fb0c
63 changed files with 5520 additions and 4552 deletions

View file

@ -47,8 +47,6 @@ extern int debug_locks_off(void);
# define locking_selftest() do { } while (0)
#endif
struct task_struct;
#ifdef CONFIG_LOCKDEP
extern void debug_show_all_locks(void);
extern void debug_show_held_locks(struct task_struct *task);

View file

@ -173,7 +173,7 @@ static inline int dma_resv_lock_slow_interruptible(struct dma_resv *obj,
*/
static inline bool __must_check dma_resv_trylock(struct dma_resv *obj)
{
return ww_mutex_trylock(&obj->lock);
return ww_mutex_trylock(&obj->lock, NULL);
}
/**

View file

@ -111,8 +111,8 @@ static __always_inline void might_resched(void)
#endif /* CONFIG_PREEMPT_* */
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
extern void ___might_sleep(const char *file, int line, int preempt_offset);
extern void __might_sleep(const char *file, int line, int preempt_offset);
extern void __might_resched(const char *file, int line, unsigned int offsets);
extern void __might_sleep(const char *file, int line);
extern void __cant_sleep(const char *file, int line, int preempt_offset);
extern void __cant_migrate(const char *file, int line);
@ -129,7 +129,7 @@ extern void __cant_migrate(const char *file, int line);
* supposed to.
*/
# define might_sleep() \
do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
/**
* cant_sleep - annotation for functions that cannot sleep
*
@ -168,10 +168,9 @@ extern void __cant_migrate(const char *file, int line);
*/
# define non_block_end() WARN_ON(current->non_block_count-- == 0)
#else
static inline void ___might_sleep(const char *file, int line,
int preempt_offset) { }
static inline void __might_sleep(const char *file, int line,
int preempt_offset) { }
static inline void __might_resched(const char *file, int line,
unsigned int offsets) { }
static inline void __might_sleep(const char *file, int line) { }
# define might_sleep() do { might_resched(); } while (0)
# define cant_sleep() do { } while (0)
# define cant_migrate() do { } while (0)

View file

@ -481,23 +481,6 @@ do { \
#endif /* CONFIG_LOCK_STAT */
#ifdef CONFIG_LOCKDEP
/*
* On lockdep we dont want the hand-coded irq-enable of
* _raw_*_lock_flags() code, because lockdep assumes
* that interrupts are not re-enabled during lock-acquire:
*/
#define LOCK_CONTENDED_FLAGS(_lock, try, lock, lockfl, flags) \
LOCK_CONTENDED((_lock), (try), (lock))
#else /* CONFIG_LOCKDEP */
#define LOCK_CONTENDED_FLAGS(_lock, try, lock, lockfl, flags) \
lockfl((_lock), (flags))
#endif /* CONFIG_LOCKDEP */
#ifdef CONFIG_PROVE_LOCKING
extern void print_irqtrace_events(struct task_struct *curr);
#else

View file

@ -21,7 +21,7 @@ enum lockdep_wait_type {
LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */
#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
LD_WAIT_CONFIG, /* preemptible in PREEMPT_RT, spinlock_t etc.. */
#else
LD_WAIT_CONFIG = LD_WAIT_SPIN,
#endif

View file

@ -122,9 +122,10 @@
* The preempt_count offset after spin_lock()
*/
#if !defined(CONFIG_PREEMPT_RT)
#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
#else
#define PREEMPT_LOCK_OFFSET 0
/* Locks on RT do not disable preemption */
#define PREEMPT_LOCK_OFFSET 0
#endif
/*

View file

@ -30,31 +30,16 @@ do { \
#ifdef CONFIG_DEBUG_SPINLOCK
extern void do_raw_read_lock(rwlock_t *lock) __acquires(lock);
#define do_raw_read_lock_flags(lock, flags) do_raw_read_lock(lock)
extern int do_raw_read_trylock(rwlock_t *lock);
extern void do_raw_read_unlock(rwlock_t *lock) __releases(lock);
extern void do_raw_write_lock(rwlock_t *lock) __acquires(lock);
#define do_raw_write_lock_flags(lock, flags) do_raw_write_lock(lock)
extern int do_raw_write_trylock(rwlock_t *lock);
extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock);
#else
#ifndef arch_read_lock_flags
# define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#endif
#ifndef arch_write_lock_flags
# define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
#endif
# define do_raw_read_lock(rwlock) do {__acquire(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0)
# define do_raw_read_lock_flags(lock, flags) \
do {__acquire(lock); arch_read_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
# define do_raw_read_trylock(rwlock) arch_read_trylock(&(rwlock)->raw_lock)
# define do_raw_read_unlock(rwlock) do {arch_read_unlock(&(rwlock)->raw_lock); __release(lock); } while (0)
# define do_raw_write_lock(rwlock) do {__acquire(lock); arch_write_lock(&(rwlock)->raw_lock); } while (0)
# define do_raw_write_lock_flags(lock, flags) \
do {__acquire(lock); arch_write_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
# define do_raw_write_trylock(rwlock) arch_write_trylock(&(rwlock)->raw_lock)
# define do_raw_write_unlock(rwlock) do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0)
#endif

View file

@ -157,8 +157,7 @@ static inline unsigned long __raw_read_lock_irqsave(rwlock_t *lock)
local_irq_save(flags);
preempt_disable();
rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED_FLAGS(lock, do_raw_read_trylock, do_raw_read_lock,
do_raw_read_lock_flags, &flags);
LOCK_CONTENDED(lock, do_raw_read_trylock, do_raw_read_lock);
return flags;
}
@ -184,8 +183,7 @@ static inline unsigned long __raw_write_lock_irqsave(rwlock_t *lock)
local_irq_save(flags);
preempt_disable();
rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED_FLAGS(lock, do_raw_write_trylock, do_raw_write_lock,
do_raw_write_lock_flags, &flags);
LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
return flags;
}

View file

@ -2037,7 +2037,7 @@ static inline int _cond_resched(void) { return 0; }
#endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
#define cond_resched() ({ \
___might_sleep(__FILE__, __LINE__, 0); \
__might_resched(__FILE__, __LINE__, 0); \
_cond_resched(); \
})
@ -2045,19 +2045,38 @@ extern int __cond_resched_lock(spinlock_t *lock);
extern int __cond_resched_rwlock_read(rwlock_t *lock);
extern int __cond_resched_rwlock_write(rwlock_t *lock);
#define cond_resched_lock(lock) ({ \
___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
__cond_resched_lock(lock); \
#define MIGHT_RESCHED_RCU_SHIFT 8
#define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1)
#ifndef CONFIG_PREEMPT_RT
/*
* Non RT kernels have an elevated preempt count due to the held lock,
* but are not allowed to be inside a RCU read side critical section
*/
# define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET
#else
/*
* spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in
* cond_resched*lock() has to take that into account because it checks for
* preempt_count() and rcu_preempt_depth().
*/
# define PREEMPT_LOCK_RESCHED_OFFSETS \
(PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT))
#endif
#define cond_resched_lock(lock) ({ \
__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
__cond_resched_lock(lock); \
})
#define cond_resched_rwlock_read(lock) ({ \
__might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
__cond_resched_rwlock_read(lock); \
#define cond_resched_rwlock_read(lock) ({ \
__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
__cond_resched_rwlock_read(lock); \
})
#define cond_resched_rwlock_write(lock) ({ \
__might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
__cond_resched_rwlock_write(lock); \
#define cond_resched_rwlock_write(lock) ({ \
__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
__cond_resched_rwlock_write(lock); \
})
static inline void cond_resched_rcu(void)

View file

@ -177,7 +177,6 @@ do { \
#ifdef CONFIG_DEBUG_SPINLOCK
extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock);
#define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock)
extern int do_raw_spin_trylock(raw_spinlock_t *lock);
extern void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock);
#else
@ -188,18 +187,6 @@ static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
mmiowb_spin_lock();
}
#ifndef arch_spin_lock_flags
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
#endif
static inline void
do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lock)
{
__acquire(lock);
arch_spin_lock_flags(&lock->raw_lock, *flags);
mmiowb_spin_lock();
}
static inline int do_raw_spin_trylock(raw_spinlock_t *lock)
{
int ret = arch_spin_trylock(&(lock)->raw_lock);

View file

@ -108,16 +108,7 @@ static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock)
local_irq_save(flags);
preempt_disable();
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
/*
* On lockdep we dont want the hand-coded irq-enable of
* do_raw_spin_lock_flags() code, because lockdep assumes
* that interrupts are not re-enabled during lock-acquire:
*/
#ifdef CONFIG_LOCKDEP
LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
#else
do_raw_spin_lock_flags(lock, &flags);
#endif
return flags;
}

View file

@ -62,7 +62,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
#define arch_spin_is_locked(lock) ((void)(lock), 0)
/* for sched/core.c and kernel_lock.c: */
# define arch_spin_lock(lock) do { barrier(); (void)(lock); } while (0)
# define arch_spin_lock_flags(lock, flags) do { barrier(); (void)(lock); } while (0)
# define arch_spin_unlock(lock) do { barrier(); (void)(lock); } while (0)
# define arch_spin_trylock(lock) ({ barrier(); (void)(lock); 1; })
#endif /* DEBUG_SPINLOCK */

View file

@ -58,6 +58,7 @@ struct mq_attr;
struct compat_stat;
struct old_timeval32;
struct robust_list_head;
struct futex_waitv;
struct getcpu_cache;
struct old_linux_dirent;
struct perf_event_attr;
@ -610,7 +611,7 @@ asmlinkage long sys_waitid(int which, pid_t pid,
asmlinkage long sys_set_tid_address(int __user *tidptr);
asmlinkage long sys_unshare(unsigned long unshare_flags);
/* kernel/futex.c */
/* kernel/futex/syscalls.c */
asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
const struct __kernel_timespec __user *utime,
u32 __user *uaddr2, u32 val3);
@ -623,6 +624,10 @@ asmlinkage long sys_get_robust_list(int pid,
asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
size_t len);
asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
unsigned int nr_futexes, unsigned int flags,
struct __kernel_timespec __user *timeout, clockid_t clockid);
/* kernel/hrtimer.c */
asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
struct __kernel_timespec __user *rmtp);

View file

@ -28,12 +28,10 @@
#ifndef CONFIG_PREEMPT_RT
#define WW_MUTEX_BASE mutex
#define ww_mutex_base_init(l,n,k) __mutex_init(l,n,k)
#define ww_mutex_base_trylock(l) mutex_trylock(l)
#define ww_mutex_base_is_locked(b) mutex_is_locked((b))
#else
#define WW_MUTEX_BASE rt_mutex
#define ww_mutex_base_init(l,n,k) __rt_mutex_init(l,n,k)
#define ww_mutex_base_trylock(l) rt_mutex_trylock(l)
#define ww_mutex_base_is_locked(b) rt_mutex_base_is_locked(&(b)->rtmutex)
#endif
@ -339,17 +337,8 @@ ww_mutex_lock_slow_interruptible(struct ww_mutex *lock,
extern void ww_mutex_unlock(struct ww_mutex *lock);
/**
* ww_mutex_trylock - tries to acquire the w/w mutex without acquire context
* @lock: mutex to lock
*
* Trylocks a mutex without acquire context, so no deadlock detection is
* possible. Returns 1 if the mutex has been acquired successfully, 0 otherwise.
*/
static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock)
{
return ww_mutex_base_trylock(&lock->base);
}
extern int __must_check ww_mutex_trylock(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx);
/***
* ww_mutex_destroy - mark a w/w mutex unusable