- hfs and hfsplus kmap API modernization from Fabio Francesco

- Valentin Schneider makes crash-kexec work properly when invoked from
   an NMI-time panic.
 
 - ntfs bugfixes from Hawkins Jiawei
 
 - Jiebin Sun improves IPC msg scalability by replacing atomic_t's with
   percpu counters.
 
 - nilfs2 cleanups from Minghao Chi
 
 - lots of other single patches all over the tree!
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCY0Yf0gAKCRDdBJ7gKXxA
 joapAQDT1d1zu7T8yf9cQXkYnZVuBKCjxKE/IsYvqaq1a42MjQD/SeWZg0wV05B8
 DhJPj9nkEp6R3Rj3Mssip+3vNuceAQM=
 =lUQY
 -----END PGP SIGNATURE-----

Merge tag 'mm-nonmm-stable-2022-10-11' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull non-MM updates from Andrew Morton:

 - hfs and hfsplus kmap API modernization (Fabio Francesco)

 - make crash-kexec work properly when invoked from an NMI-time panic
   (Valentin Schneider)

 - ntfs bugfixes (Hawkins Jiawei)

 - improve IPC msg scalability by replacing atomic_t's with percpu
   counters (Jiebin Sun)

 - nilfs2 cleanups (Minghao Chi)

 - lots of other single patches all over the tree!

* tag 'mm-nonmm-stable-2022-10-11' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (71 commits)
  include/linux/entry-common.h: remove has_signal comment of arch_do_signal_or_restart() prototype
  proc: test how it holds up with mapping'less process
  mailmap: update Frank Rowand email address
  ia64: mca: use strscpy() is more robust and safer
  init/Kconfig: fix unmet direct dependencies
  ia64: update config files
  nilfs2: replace WARN_ONs by nilfs_error for checkpoint acquisition failure
  fork: remove duplicate included header files
  init/main.c: remove unnecessary (void*) conversions
  proc: mark more files as permanent
  nilfs2: remove the unneeded result variable
  nilfs2: delete unnecessary checks before brelse()
  checkpatch: warn for non-standard fixes tag style
  usr/gen_init_cpio.c: remove unnecessary -1 values from int file
  ipc/msg: mitigate the lock contention with percpu counter
  percpu: add percpu_counter_add_local and percpu_counter_sub_local
  fs/ocfs2: fix repeated words in comments
  relay: use kvcalloc to alloc page array in relay_alloc_page_array
  proc: make config PROC_CHILDREN depend on PROC_FS
  fs: uninline inode_maybe_inc_iversion()
  ...
This commit is contained in:
Linus Torvalds 2022-10-12 11:00:22 -07:00
commit 676cb49573
154 changed files with 1148 additions and 621 deletions

View file

@ -184,6 +184,10 @@ void put_task_struct_rcu_user(struct task_struct *task)
call_rcu(&task->rcu, delayed_put_task_struct);
}
void __weak release_thread(struct task_struct *dead_task)
{
}
void release_task(struct task_struct *p)
{
struct task_struct *leader;

View file

@ -247,15 +247,11 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
/* cut off if it is too long */
if (count > KSYM_NAME_LEN)
count = KSYM_NAME_LEN;
buf = kmalloc(count + 1, GFP_KERNEL);
if (!buf)
return -ENOMEM;
if (copy_from_user(buf, buffer, count)) {
ret = -EFAULT;
goto out_free;
}
buf[count] = '\0';
buf = memdup_user_nul(buffer, count);
if (IS_ERR(buf))
return PTR_ERR(buf);
sym = strstrip(buf);
mutex_lock(&fei_lock);
@ -298,17 +294,15 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
}
ret = register_kprobe(&attr->kp);
if (!ret)
fei_debugfs_add_attr(attr);
if (ret < 0)
fei_attr_remove(attr);
else {
list_add_tail(&attr->list, &fei_attr_list);
ret = count;
if (ret) {
fei_attr_free(attr);
goto out;
}
fei_debugfs_add_attr(attr);
list_add_tail(&attr->list, &fei_attr_list);
ret = count;
out:
mutex_unlock(&fei_lock);
out_free:
kfree(buf);
return ret;
}

View file

@ -97,7 +97,6 @@
#include <linux/scs.h>
#include <linux/io_uring.h>
#include <linux/bpf.h>
#include <linux/sched/mm.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>

View file

@ -93,13 +93,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
/*
* Because we write directly to the reserved memory region when loading
* crash kernels we need a mutex here to prevent multiple crash kernels
* from attempting to load simultaneously, and to prevent a crash kernel
* from loading over the top of a in use crash kernel.
*
* KISS: always take the mutex.
* crash kernels we need a serialization here to prevent multiple crash
* kernels from attempting to load simultaneously.
*/
if (!mutex_trylock(&kexec_mutex))
if (!kexec_trylock())
return -EBUSY;
if (flags & KEXEC_ON_CRASH) {
@ -165,7 +162,7 @@ out:
kimage_free(image);
out_unlock:
mutex_unlock(&kexec_mutex);
kexec_unlock();
return ret;
}

View file

@ -46,7 +46,7 @@
#include <crypto/hash.h>
#include "kexec_internal.h"
DEFINE_MUTEX(kexec_mutex);
atomic_t __kexec_lock = ATOMIC_INIT(0);
/* Per cpu memory for storing cpu states in case of system crash. */
note_buf_t __percpu *crash_notes;
@ -809,7 +809,7 @@ static int kimage_load_normal_segment(struct kimage *image,
if (result < 0)
goto out;
ptr = kmap(page);
ptr = kmap_local_page(page);
/* Start with a clear page */
clear_page(ptr);
ptr += maddr & ~PAGE_MASK;
@ -822,7 +822,7 @@ static int kimage_load_normal_segment(struct kimage *image,
memcpy(ptr, kbuf, uchunk);
else
result = copy_from_user(ptr, buf, uchunk);
kunmap(page);
kunmap_local(ptr);
if (result) {
result = -EFAULT;
goto out;
@ -873,7 +873,7 @@ static int kimage_load_crash_segment(struct kimage *image,
goto out;
}
arch_kexec_post_alloc_pages(page_address(page), 1, 0);
ptr = kmap(page);
ptr = kmap_local_page(page);
ptr += maddr & ~PAGE_MASK;
mchunk = min_t(size_t, mbytes,
PAGE_SIZE - (maddr & ~PAGE_MASK));
@ -889,7 +889,7 @@ static int kimage_load_crash_segment(struct kimage *image,
else
result = copy_from_user(ptr, buf, uchunk);
kexec_flush_icache_page(page);
kunmap(page);
kunmap_local(ptr);
arch_kexec_pre_free_pages(page_address(page), 1);
if (result) {
result = -EFAULT;
@ -959,7 +959,7 @@ late_initcall(kexec_core_sysctl_init);
*/
void __noclone __crash_kexec(struct pt_regs *regs)
{
/* Take the kexec_mutex here to prevent sys_kexec_load
/* Take the kexec_lock here to prevent sys_kexec_load
* running on one cpu from replacing the crash kernel
* we are using after a panic on a different cpu.
*
@ -967,7 +967,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
* of memory the xchg(&kexec_crash_image) would be
* sufficient. But since I reuse the memory...
*/
if (mutex_trylock(&kexec_mutex)) {
if (kexec_trylock()) {
if (kexec_crash_image) {
struct pt_regs fixed_regs;
@ -976,7 +976,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
machine_crash_shutdown(&fixed_regs);
machine_kexec(kexec_crash_image);
}
mutex_unlock(&kexec_mutex);
kexec_unlock();
}
}
STACK_FRAME_NON_STANDARD(__crash_kexec);
@ -1004,14 +1004,17 @@ void crash_kexec(struct pt_regs *regs)
}
}
size_t crash_get_memory_size(void)
ssize_t crash_get_memory_size(void)
{
size_t size = 0;
ssize_t size = 0;
if (!kexec_trylock())
return -EBUSY;
mutex_lock(&kexec_mutex);
if (crashk_res.end != crashk_res.start)
size = resource_size(&crashk_res);
mutex_unlock(&kexec_mutex);
kexec_unlock();
return size;
}
@ -1022,7 +1025,8 @@ int crash_shrink_memory(unsigned long new_size)
unsigned long old_size;
struct resource *ram_res;
mutex_lock(&kexec_mutex);
if (!kexec_trylock())
return -EBUSY;
if (kexec_crash_image) {
ret = -ENOENT;
@ -1060,7 +1064,7 @@ int crash_shrink_memory(unsigned long new_size)
insert_resource(&iomem_resource, ram_res);
unlock:
mutex_unlock(&kexec_mutex);
kexec_unlock();
return ret;
}
@ -1132,7 +1136,7 @@ int kernel_kexec(void)
{
int error = 0;
if (!mutex_trylock(&kexec_mutex))
if (!kexec_trylock())
return -EBUSY;
if (!kexec_image) {
error = -EINVAL;
@ -1208,6 +1212,6 @@ int kernel_kexec(void)
#endif
Unlock:
mutex_unlock(&kexec_mutex);
kexec_unlock();
return error;
}

View file

@ -339,7 +339,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
image = NULL;
if (!mutex_trylock(&kexec_mutex))
if (!kexec_trylock())
return -EBUSY;
dest_image = &kexec_image;
@ -411,7 +411,7 @@ out:
if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
arch_kexec_protect_crashkres();
mutex_unlock(&kexec_mutex);
kexec_unlock();
kimage_free(image);
return ret;
}

View file

@ -13,7 +13,20 @@ void kimage_terminate(struct kimage *image);
int kimage_is_destination_range(struct kimage *image,
unsigned long start, unsigned long end);
extern struct mutex kexec_mutex;
/*
* Whatever is used to serialize accesses to the kexec_crash_image needs to be
* NMI safe, as __crash_kexec() can happen during nmi_panic(), so here we use a
* "simple" atomic variable that is acquired with a cmpxchg().
*/
extern atomic_t __kexec_lock;
static inline bool kexec_trylock(void)
{
return atomic_cmpxchg_acquire(&__kexec_lock, 0, 1) == 0;
}
static inline void kexec_unlock(void)
{
atomic_set_release(&__kexec_lock, 0);
}
#ifdef CONFIG_KEXEC_FILE
#include <linux/purgatory.h>

View file

@ -105,7 +105,12 @@ KERNEL_ATTR_RO(kexec_crash_loaded);
static ssize_t kexec_crash_size_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
return sprintf(buf, "%zu\n", crash_get_memory_size());
ssize_t size = crash_get_memory_size();
if (size < 0)
return size;
return sprintf(buf, "%zd\n", size);
}
static ssize_t kexec_crash_size_store(struct kobject *kobj,
struct kobj_attribute *attr,

View file

@ -112,7 +112,7 @@ static void __sched
account_global_scheduler_latency(struct task_struct *tsk,
struct latency_record *lat)
{
int firstnonnull = MAXLR + 1;
int firstnonnull = MAXLR;
int i;
/* skip kernel threads for now */
@ -150,7 +150,7 @@ account_global_scheduler_latency(struct task_struct *tsk,
}
i = firstnonnull;
if (i >= MAXLR - 1)
if (i >= MAXLR)
return;
/* Allocted a new one: */

View file

@ -59,43 +59,39 @@ int profile_setup(char *str)
static const char schedstr[] = "schedule";
static const char sleepstr[] = "sleep";
static const char kvmstr[] = "kvm";
const char *select = NULL;
int par;
if (!strncmp(str, sleepstr, strlen(sleepstr))) {
#ifdef CONFIG_SCHEDSTATS
force_schedstat_enabled();
prof_on = SLEEP_PROFILING;
if (str[strlen(sleepstr)] == ',')
str += strlen(sleepstr) + 1;
if (get_option(&str, &par))
prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
pr_info("kernel sleep profiling enabled (shift: %u)\n",
prof_shift);
select = sleepstr;
#else
pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n");
#endif /* CONFIG_SCHEDSTATS */
} else if (!strncmp(str, schedstr, strlen(schedstr))) {
prof_on = SCHED_PROFILING;
if (str[strlen(schedstr)] == ',')
str += strlen(schedstr) + 1;
if (get_option(&str, &par))
prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
pr_info("kernel schedule profiling enabled (shift: %u)\n",
prof_shift);
select = schedstr;
} else if (!strncmp(str, kvmstr, strlen(kvmstr))) {
prof_on = KVM_PROFILING;
if (str[strlen(kvmstr)] == ',')
str += strlen(kvmstr) + 1;
if (get_option(&str, &par))
prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
pr_info("kernel KVM profiling enabled (shift: %u)\n",
prof_shift);
select = kvmstr;
} else if (get_option(&str, &par)) {
prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
prof_on = CPU_PROFILING;
pr_info("kernel profiling enabled (shift: %u)\n",
prof_shift);
}
if (select) {
if (str[strlen(select)] == ',')
str += strlen(select) + 1;
if (get_option(&str, &par))
prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
pr_info("kernel %s profiling enabled (shift: %u)\n",
select, prof_shift);
}
return 1;
}
__setup("profile=", profile_setup);

View file

@ -60,10 +60,7 @@ static const struct vm_operations_struct relay_file_mmap_ops = {
*/
static struct page **relay_alloc_page_array(unsigned int n_pages)
{
const size_t pa_size = n_pages * sizeof(struct page *);
if (pa_size > PAGE_SIZE)
return vzalloc(pa_size);
return kzalloc(pa_size, GFP_KERNEL);
return kvcalloc(n_pages, sizeof(struct page *), GFP_KERNEL);
}
/*

View file

@ -433,7 +433,7 @@ bool cpu_wait_death(unsigned int cpu, int seconds)
/* The outgoing CPU will normally get done quite quickly. */
if (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) == CPU_DEAD)
goto update_state;
goto update_state_early;
udelay(5);
/* But if the outgoing CPU dawdles, wait increasingly long times. */
@ -444,16 +444,17 @@ bool cpu_wait_death(unsigned int cpu, int seconds)
break;
sleep_jf = DIV_ROUND_UP(sleep_jf * 11, 10);
}
update_state:
update_state_early:
oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
update_state:
if (oldstate == CPU_DEAD) {
/* Outgoing CPU died normally, update state. */
smp_mb(); /* atomic_read() before update. */
atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_POST_DEAD);
} else {
/* Outgoing CPU still hasn't died, set state accordingly. */
if (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
oldstate, CPU_BROKEN) != oldstate)
if (!atomic_try_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
&oldstate, CPU_BROKEN))
goto update_state;
ret = false;
}
@ -475,14 +476,14 @@ bool cpu_report_death(void)
int newstate;
int cpu = smp_processor_id();
oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
do {
oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
if (oldstate != CPU_BROKEN)
newstate = CPU_DEAD;
else
newstate = CPU_DEAD_FROZEN;
} while (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
oldstate, newstate) != oldstate);
} while (!atomic_try_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
&oldstate, newstate));
return newstate == CPU_DEAD;
}

View file

@ -47,12 +47,12 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
/* record the work call stack in order to print it in KASAN reports */
kasan_record_aux_stack(work);
head = READ_ONCE(task->task_works);
do {
head = READ_ONCE(task->task_works);
if (unlikely(head == &work_exited))
return -ESRCH;
work->next = head;
} while (cmpxchg(&task->task_works, head, work) != head);
} while (!try_cmpxchg(&task->task_works, &head, work));
switch (notify) {
case TWA_NONE:
@ -100,10 +100,12 @@ task_work_cancel_match(struct task_struct *task,
* we raced with task_work_run(), *pprev == NULL/exited.
*/
raw_spin_lock_irqsave(&task->pi_lock, flags);
while ((work = READ_ONCE(*pprev))) {
if (!match(work, data))
work = READ_ONCE(*pprev);
while (work) {
if (!match(work, data)) {
pprev = &work->next;
else if (cmpxchg(pprev, work, work->next) == work)
work = READ_ONCE(*pprev);
} else if (try_cmpxchg(pprev, &work, work->next))
break;
}
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
@ -151,16 +153,16 @@ void task_work_run(void)
* work->func() can do task_work_add(), do not set
* work_exited unless the list is empty.
*/
work = READ_ONCE(task->task_works);
do {
head = NULL;
work = READ_ONCE(task->task_works);
if (!work) {
if (task->flags & PF_EXITING)
head = &work_exited;
else
break;
}
} while (cmpxchg(&task->task_works, work, head) != work);
} while (!try_cmpxchg(&task->task_works, &work, head));
if (!work)
break;

View file

@ -75,6 +75,13 @@ static DEFINE_CTL_TABLE_POLL(hostname_poll);
static DEFINE_CTL_TABLE_POLL(domainname_poll);
static struct ctl_table uts_kern_table[] = {
{
.procname = "arch",
.data = init_uts_ns.name.machine,
.maxlen = sizeof(init_uts_ns.name.machine),
.mode = 0444,
.proc_handler = proc_do_uts_string,
},
{
.procname = "ostype",
.data = init_uts_ns.name.sysname,