Merge branch 'tip/tracing/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace into tracing/urgent

This commit is contained in:
Ingo Molnar 2010-03-04 11:51:29 +01:00
commit e02c4fd314
4418 changed files with 286315 additions and 111802 deletions

View file

@ -100,6 +100,7 @@ obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
obj-$(CONFIG_PADATA) += padata.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is

View file

@ -23,6 +23,7 @@
*/
#include <linux/cgroup.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/fs.h>
@ -166,6 +167,20 @@ static DEFINE_SPINLOCK(hierarchy_id_lock);
*/
static int need_forkexit_callback __read_mostly;
#ifdef CONFIG_PROVE_LOCKING
int cgroup_lock_is_held(void)
{
return lockdep_is_held(&cgroup_mutex);
}
#else /* #ifdef CONFIG_PROVE_LOCKING */
int cgroup_lock_is_held(void)
{
return mutex_is_locked(&cgroup_mutex);
}
#endif /* #else #ifdef CONFIG_PROVE_LOCKING */
EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
/* convenient tests for these bits */
inline int cgroup_is_removed(const struct cgroup *cgrp)
{

View file

@ -85,7 +85,9 @@ static void __exit_signal(struct task_struct *tsk)
BUG_ON(!sig);
BUG_ON(!atomic_read(&sig->count));
sighand = rcu_dereference(tsk->sighand);
sighand = rcu_dereference_check(tsk->sighand,
rcu_read_lock_held() ||
lockdep_is_held(&tasklist_lock));
spin_lock(&sighand->siglock);
posix_cpu_timers_exit(tsk);
@ -170,8 +172,10 @@ void release_task(struct task_struct * p)
repeat:
tracehook_prepare_release_task(p);
/* don't need to get the RCU readlock here - the process is dead and
* can't be modifying its own credentials */
* can't be modifying its own credentials. But shut RCU-lockdep up */
rcu_read_lock();
atomic_dec(&__task_cred(p)->user->processes);
rcu_read_unlock();
proc_flush_task(p);
@ -473,9 +477,11 @@ static void close_files(struct files_struct * files)
/*
* It is safe to dereference the fd table without RCU or
* ->file_lock because this is the last reference to the
* files structure.
* files structure. But use RCU to shut RCU-lockdep up.
*/
rcu_read_lock();
fdt = files_fdtable(files);
rcu_read_unlock();
for (;;) {
unsigned long set;
i = j * __NFDBITS;
@ -521,10 +527,12 @@ void put_files_struct(struct files_struct *files)
* at the end of the RCU grace period. Otherwise,
* you can free files immediately.
*/
rcu_read_lock();
fdt = files_fdtable(files);
if (fdt != &files->fdtab)
kmem_cache_free(files_cachep, files);
free_fdtable(fdt);
rcu_read_unlock();
}
}

View file

@ -86,6 +86,7 @@ int max_threads; /* tunable limit on nr_threads */
DEFINE_PER_CPU(unsigned long, process_counts) = 0;
__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
EXPORT_SYMBOL_GPL(tasklist_lock);
int nr_processes(void)
{

View file

@ -146,7 +146,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
struct task_struct *p;
ret = -ESRCH;
read_lock(&tasklist_lock);
rcu_read_lock();
p = find_task_by_vpid(pid);
if (!p)
goto err_unlock;
@ -157,7 +157,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
!capable(CAP_SYS_PTRACE))
goto err_unlock;
head = p->compat_robust_list;
read_unlock(&tasklist_lock);
rcu_read_unlock();
}
if (put_user(sizeof(*head), len_ptr))
@ -165,7 +165,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
return put_user(ptr_to_compat(head), head_ptr);
err_unlock:
read_unlock(&tasklist_lock);
rcu_read_unlock();
return ret;
}

View file

@ -44,6 +44,7 @@
#include <linux/debugfs.h>
#include <linux/kdebug.h>
#include <linux/memory.h>
#include <linux/ftrace.h>
#include <asm-generic/sections.h>
#include <asm/cacheflush.h>
@ -93,6 +94,7 @@ static struct kprobe_blackpoint kprobe_blacklist[] = {
{"native_get_debugreg",},
{"irq_entries_start",},
{"common_interrupt",},
{"mcount",}, /* mcount can be called from everywhere */
{NULL} /* Terminator */
};
@ -124,30 +126,6 @@ static LIST_HEAD(kprobe_insn_pages);
static int kprobe_garbage_slots;
static int collect_garbage_slots(void);
static int __kprobes check_safety(void)
{
int ret = 0;
#if defined(CONFIG_PREEMPT) && defined(CONFIG_FREEZER)
ret = freeze_processes();
if (ret == 0) {
struct task_struct *p, *q;
do_each_thread(p, q) {
if (p != current && p->state == TASK_RUNNING &&
p->pid != 0) {
printk("Check failed: %s is running\n",p->comm);
ret = -1;
goto loop_end;
}
} while_each_thread(p, q);
}
loop_end:
thaw_processes();
#else
synchronize_sched();
#endif
return ret;
}
/**
* __get_insn_slot() - Find a slot on an executable page for an instruction.
* We allocate an executable page if there's no room on existing ones.
@ -235,9 +213,8 @@ static int __kprobes collect_garbage_slots(void)
{
struct kprobe_insn_page *kip, *next;
/* Ensure no-one is preepmted on the garbages */
if (check_safety())
return -EAGAIN;
/* Ensure no-one is interrupted on the garbages */
synchronize_sched();
list_for_each_entry_safe(kip, next, &kprobe_insn_pages, list) {
int i;
@ -728,7 +705,8 @@ int __kprobes register_kprobe(struct kprobe *p)
preempt_disable();
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr)) {
in_kprobes_functions((unsigned long) p->addr) ||
ftrace_text_reserved(p->addr, p->addr)) {
preempt_enable();
return -EINVAL;
}

View file

@ -197,16 +197,8 @@ static int __init ksysfs_init(void)
goto group_exit;
}
/* create the /sys/kernel/uids/ directory */
error = uids_sysfs_init();
if (error)
goto notes_exit;
return 0;
notes_exit:
if (notes_size > 0)
sysfs_remove_bin_file(kernel_kobj, &notes_attr);
group_exit:
sysfs_remove_group(kernel_kobj, &kernel_attr_group);
kset_exit:

View file

@ -101,7 +101,7 @@ static void create_kthread(struct kthread_create_info *create)
*
* Description: This helper function creates and names a kernel
* thread. The thread will be stopped: use wake_up_process() to start
* it. See also kthread_run(), kthread_create_on_cpu().
* it. See also kthread_run().
*
* When woken, the thread will run @threadfn() with @data as its
* argument. @threadfn() can either call do_exit() directly if it is a

View file

@ -3809,3 +3809,21 @@ void lockdep_sys_exit(void)
lockdep_print_held_locks(curr);
}
}
void lockdep_rcu_dereference(const char *file, const int line)
{
struct task_struct *curr = current;
if (!debug_locks_off())
return;
printk("\n===================================================\n");
printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n");
printk( "---------------------------------------------------\n");
printk("%s:%d invoked rcu_dereference_check() without protection!\n",
file, line);
printk("\nother info that might help us debug this:\n\n");
lockdep_print_held_locks(curr);
printk("\nstack backtrace:\n");
dump_stack();
}
EXPORT_SYMBOL_GPL(lockdep_rcu_dereference);

View file

@ -78,10 +78,10 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
int ret = NOTIFY_DONE;
struct notifier_block *nb, *next_nb;
nb = rcu_dereference(*nl);
nb = rcu_dereference_raw(*nl);
while (nb && nr_to_call) {
next_nb = rcu_dereference(nb->next);
next_nb = rcu_dereference_raw(nb->next);
#ifdef CONFIG_DEBUG_NOTIFIERS
if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
@ -309,7 +309,7 @@ int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
* racy then it does not matter what the result of the test
* is, we re-check the list after having taken the lock anyway:
*/
if (rcu_dereference(nh->head)) {
if (rcu_dereference_raw(nh->head)) {
down_read(&nh->rwsem);
ret = notifier_call_chain(&nh->head, val, v, nr_to_call,
nr_calls);

690
kernel/padata.c Normal file
View file

@ -0,0 +1,690 @@
/*
* padata.c - generic interface to process data streams in parallel
*
* Copyright (C) 2008, 2009 secunet Security Networks AG
* Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <linux/module.h>
#include <linux/cpumask.h>
#include <linux/err.h>
#include <linux/cpu.h>
#include <linux/padata.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/rcupdate.h>
#define MAX_SEQ_NR INT_MAX - NR_CPUS
#define MAX_OBJ_NUM 10000 * NR_CPUS
static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
{
int cpu, target_cpu;
target_cpu = cpumask_first(pd->cpumask);
for (cpu = 0; cpu < cpu_index; cpu++)
target_cpu = cpumask_next(target_cpu, pd->cpumask);
return target_cpu;
}
static int padata_cpu_hash(struct padata_priv *padata)
{
int cpu_index;
struct parallel_data *pd;
pd = padata->pd;
/*
* Hash the sequence numbers to the cpus by taking
* seq_nr mod. number of cpus in use.
*/
cpu_index = padata->seq_nr % cpumask_weight(pd->cpumask);
return padata_index_to_cpu(pd, cpu_index);
}
static void padata_parallel_worker(struct work_struct *work)
{
struct padata_queue *queue;
struct parallel_data *pd;
struct padata_instance *pinst;
LIST_HEAD(local_list);
local_bh_disable();
queue = container_of(work, struct padata_queue, pwork);
pd = queue->pd;
pinst = pd->pinst;
spin_lock(&queue->parallel.lock);
list_replace_init(&queue->parallel.list, &local_list);
spin_unlock(&queue->parallel.lock);
while (!list_empty(&local_list)) {
struct padata_priv *padata;
padata = list_entry(local_list.next,
struct padata_priv, list);
list_del_init(&padata->list);
padata->parallel(padata);
}
local_bh_enable();
}
/*
* padata_do_parallel - padata parallelization function
*
* @pinst: padata instance
* @padata: object to be parallelized
* @cb_cpu: cpu the serialization callback function will run on,
* must be in the cpumask of padata.
*
* The parallelization callback function will run with BHs off.
* Note: Every object which is parallelized by padata_do_parallel
* must be seen by padata_do_serial.
*/
int padata_do_parallel(struct padata_instance *pinst,
struct padata_priv *padata, int cb_cpu)
{
int target_cpu, err;
struct padata_queue *queue;
struct parallel_data *pd;
rcu_read_lock_bh();
pd = rcu_dereference(pinst->pd);
err = 0;
if (!(pinst->flags & PADATA_INIT))
goto out;
err = -EBUSY;
if ((pinst->flags & PADATA_RESET))
goto out;
if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
goto out;
err = -EINVAL;
if (!cpumask_test_cpu(cb_cpu, pd->cpumask))
goto out;
err = -EINPROGRESS;
atomic_inc(&pd->refcnt);
padata->pd = pd;
padata->cb_cpu = cb_cpu;
if (unlikely(atomic_read(&pd->seq_nr) == pd->max_seq_nr))
atomic_set(&pd->seq_nr, -1);
padata->seq_nr = atomic_inc_return(&pd->seq_nr);
target_cpu = padata_cpu_hash(padata);
queue = per_cpu_ptr(pd->queue, target_cpu);
spin_lock(&queue->parallel.lock);
list_add_tail(&padata->list, &queue->parallel.list);
spin_unlock(&queue->parallel.lock);
queue_work_on(target_cpu, pinst->wq, &queue->pwork);
out:
rcu_read_unlock_bh();
return err;
}
EXPORT_SYMBOL(padata_do_parallel);
static struct padata_priv *padata_get_next(struct parallel_data *pd)
{
int cpu, num_cpus, empty, calc_seq_nr;
int seq_nr, next_nr, overrun, next_overrun;
struct padata_queue *queue, *next_queue;
struct padata_priv *padata;
struct padata_list *reorder;
empty = 0;
next_nr = -1;
next_overrun = 0;
next_queue = NULL;
num_cpus = cpumask_weight(pd->cpumask);
for_each_cpu(cpu, pd->cpumask) {
queue = per_cpu_ptr(pd->queue, cpu);
reorder = &queue->reorder;
/*
* Calculate the seq_nr of the object that should be
* next in this queue.
*/
overrun = 0;
calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
+ queue->cpu_index;
if (unlikely(calc_seq_nr > pd->max_seq_nr)) {
calc_seq_nr = calc_seq_nr - pd->max_seq_nr - 1;
overrun = 1;
}
if (!list_empty(&reorder->list)) {
padata = list_entry(reorder->list.next,
struct padata_priv, list);
seq_nr = padata->seq_nr;
BUG_ON(calc_seq_nr != seq_nr);
} else {
seq_nr = calc_seq_nr;
empty++;
}
if (next_nr < 0 || seq_nr < next_nr
|| (next_overrun && !overrun)) {
next_nr = seq_nr;
next_overrun = overrun;
next_queue = queue;
}
}
padata = NULL;
if (empty == num_cpus)
goto out;
reorder = &next_queue->reorder;
if (!list_empty(&reorder->list)) {
padata = list_entry(reorder->list.next,
struct padata_priv, list);
if (unlikely(next_overrun)) {
for_each_cpu(cpu, pd->cpumask) {
queue = per_cpu_ptr(pd->queue, cpu);
atomic_set(&queue->num_obj, 0);
}
}
spin_lock(&reorder->lock);
list_del_init(&padata->list);
atomic_dec(&pd->reorder_objects);
spin_unlock(&reorder->lock);
atomic_inc(&next_queue->num_obj);
goto out;
}
if (next_nr % num_cpus == next_queue->cpu_index) {
padata = ERR_PTR(-ENODATA);
goto out;
}
padata = ERR_PTR(-EINPROGRESS);
out:
return padata;
}
static void padata_reorder(struct parallel_data *pd)
{
struct padata_priv *padata;
struct padata_queue *queue;
struct padata_instance *pinst = pd->pinst;
try_again:
if (!spin_trylock_bh(&pd->lock))
goto out;
while (1) {
padata = padata_get_next(pd);
if (!padata || PTR_ERR(padata) == -EINPROGRESS)
break;
if (PTR_ERR(padata) == -ENODATA) {
spin_unlock_bh(&pd->lock);
goto out;
}
queue = per_cpu_ptr(pd->queue, padata->cb_cpu);
spin_lock(&queue->serial.lock);
list_add_tail(&padata->list, &queue->serial.list);
spin_unlock(&queue->serial.lock);
queue_work_on(padata->cb_cpu, pinst->wq, &queue->swork);
}
spin_unlock_bh(&pd->lock);
if (atomic_read(&pd->reorder_objects))
goto try_again;
out:
return;
}
static void padata_serial_worker(struct work_struct *work)
{
struct padata_queue *queue;
struct parallel_data *pd;
LIST_HEAD(local_list);
local_bh_disable();
queue = container_of(work, struct padata_queue, swork);
pd = queue->pd;
spin_lock(&queue->serial.lock);
list_replace_init(&queue->serial.list, &local_list);
spin_unlock(&queue->serial.lock);
while (!list_empty(&local_list)) {
struct padata_priv *padata;
padata = list_entry(local_list.next,
struct padata_priv, list);
list_del_init(&padata->list);
padata->serial(padata);
atomic_dec(&pd->refcnt);
}
local_bh_enable();
}
/*
* padata_do_serial - padata serialization function
*
* @padata: object to be serialized.
*
* padata_do_serial must be called for every parallelized object.
* The serialization callback function will run with BHs off.
*/
void padata_do_serial(struct padata_priv *padata)
{
int cpu;
struct padata_queue *queue;
struct parallel_data *pd;
pd = padata->pd;
cpu = get_cpu();
queue = per_cpu_ptr(pd->queue, cpu);
spin_lock(&queue->reorder.lock);
atomic_inc(&pd->reorder_objects);
list_add_tail(&padata->list, &queue->reorder.list);
spin_unlock(&queue->reorder.lock);
put_cpu();
padata_reorder(pd);
}
EXPORT_SYMBOL(padata_do_serial);
static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
const struct cpumask *cpumask)
{
int cpu, cpu_index, num_cpus;
struct padata_queue *queue;
struct parallel_data *pd;
cpu_index = 0;
pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
if (!pd)
goto err;
pd->queue = alloc_percpu(struct padata_queue);
if (!pd->queue)
goto err_free_pd;
if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL))
goto err_free_queue;
for_each_possible_cpu(cpu) {
queue = per_cpu_ptr(pd->queue, cpu);
queue->pd = pd;
if (cpumask_test_cpu(cpu, cpumask)
&& cpumask_test_cpu(cpu, cpu_active_mask)) {
queue->cpu_index = cpu_index;
cpu_index++;
} else
queue->cpu_index = -1;
INIT_LIST_HEAD(&queue->reorder.list);
INIT_LIST_HEAD(&queue->parallel.list);
INIT_LIST_HEAD(&queue->serial.list);
spin_lock_init(&queue->reorder.lock);
spin_lock_init(&queue->parallel.lock);
spin_lock_init(&queue->serial.lock);
INIT_WORK(&queue->pwork, padata_parallel_worker);
INIT_WORK(&queue->swork, padata_serial_worker);
atomic_set(&queue->num_obj, 0);
}
cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
num_cpus = cpumask_weight(pd->cpumask);
pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
atomic_set(&pd->seq_nr, -1);
atomic_set(&pd->reorder_objects, 0);
atomic_set(&pd->refcnt, 0);
pd->pinst = pinst;
spin_lock_init(&pd->lock);
return pd;
err_free_queue:
free_percpu(pd->queue);
err_free_pd:
kfree(pd);
err:
return NULL;
}
static void padata_free_pd(struct parallel_data *pd)
{
free_cpumask_var(pd->cpumask);
free_percpu(pd->queue);
kfree(pd);
}
static void padata_replace(struct padata_instance *pinst,
struct parallel_data *pd_new)
{
struct parallel_data *pd_old = pinst->pd;
pinst->flags |= PADATA_RESET;
rcu_assign_pointer(pinst->pd, pd_new);
synchronize_rcu();
while (atomic_read(&pd_old->refcnt) != 0)
yield();
flush_workqueue(pinst->wq);
padata_free_pd(pd_old);
pinst->flags &= ~PADATA_RESET;
}
/*
* padata_set_cpumask - set the cpumask that padata should use
*
* @pinst: padata instance
* @cpumask: the cpumask to use
*/
int padata_set_cpumask(struct padata_instance *pinst,
cpumask_var_t cpumask)
{
struct parallel_data *pd;
int err = 0;
might_sleep();
mutex_lock(&pinst->lock);
pd = padata_alloc_pd(pinst, cpumask);
if (!pd) {
err = -ENOMEM;
goto out;
}
cpumask_copy(pinst->cpumask, cpumask);
padata_replace(pinst, pd);
out:
mutex_unlock(&pinst->lock);
return err;
}
EXPORT_SYMBOL(padata_set_cpumask);
static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
{
struct parallel_data *pd;
if (cpumask_test_cpu(cpu, cpu_active_mask)) {
pd = padata_alloc_pd(pinst, pinst->cpumask);
if (!pd)
return -ENOMEM;
padata_replace(pinst, pd);
}
return 0;
}
/*
* padata_add_cpu - add a cpu to the padata cpumask
*
* @pinst: padata instance
* @cpu: cpu to add
*/
int padata_add_cpu(struct padata_instance *pinst, int cpu)
{
int err;
might_sleep();
mutex_lock(&pinst->lock);
cpumask_set_cpu(cpu, pinst->cpumask);
err = __padata_add_cpu(pinst, cpu);
mutex_unlock(&pinst->lock);
return err;
}
EXPORT_SYMBOL(padata_add_cpu);
static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
{
struct parallel_data *pd;
if (cpumask_test_cpu(cpu, cpu_online_mask)) {
pd = padata_alloc_pd(pinst, pinst->cpumask);
if (!pd)
return -ENOMEM;
padata_replace(pinst, pd);
}
return 0;
}
/*
* padata_remove_cpu - remove a cpu from the padata cpumask
*
* @pinst: padata instance
* @cpu: cpu to remove
*/
int padata_remove_cpu(struct padata_instance *pinst, int cpu)
{
int err;
might_sleep();
mutex_lock(&pinst->lock);
cpumask_clear_cpu(cpu, pinst->cpumask);
err = __padata_remove_cpu(pinst, cpu);
mutex_unlock(&pinst->lock);
return err;
}
EXPORT_SYMBOL(padata_remove_cpu);
/*
* padata_start - start the parallel processing
*
* @pinst: padata instance to start
*/
void padata_start(struct padata_instance *pinst)
{
might_sleep();
mutex_lock(&pinst->lock);
pinst->flags |= PADATA_INIT;
mutex_unlock(&pinst->lock);
}
EXPORT_SYMBOL(padata_start);
/*
* padata_stop - stop the parallel processing
*
* @pinst: padata instance to stop
*/
void padata_stop(struct padata_instance *pinst)
{
might_sleep();
mutex_lock(&pinst->lock);
pinst->flags &= ~PADATA_INIT;
mutex_unlock(&pinst->lock);
}
EXPORT_SYMBOL(padata_stop);
static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
int err;
struct padata_instance *pinst;
int cpu = (unsigned long)hcpu;
pinst = container_of(nfb, struct padata_instance, cpu_notifier);
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
if (!cpumask_test_cpu(cpu, pinst->cpumask))
break;
mutex_lock(&pinst->lock);
err = __padata_add_cpu(pinst, cpu);
mutex_unlock(&pinst->lock);
if (err)
return NOTIFY_BAD;
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
if (!cpumask_test_cpu(cpu, pinst->cpumask))
break;
mutex_lock(&pinst->lock);
err = __padata_remove_cpu(pinst, cpu);
mutex_unlock(&pinst->lock);
if (err)
return NOTIFY_BAD;
break;
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
if (!cpumask_test_cpu(cpu, pinst->cpumask))
break;
mutex_lock(&pinst->lock);
__padata_remove_cpu(pinst, cpu);
mutex_unlock(&pinst->lock);
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
if (!cpumask_test_cpu(cpu, pinst->cpumask))
break;
mutex_lock(&pinst->lock);
__padata_add_cpu(pinst, cpu);
mutex_unlock(&pinst->lock);
}
return NOTIFY_OK;
}
/*
* padata_alloc - allocate and initialize a padata instance
*
* @cpumask: cpumask that padata uses for parallelization
* @wq: workqueue to use for the allocated padata instance
*/
struct padata_instance *padata_alloc(const struct cpumask *cpumask,
struct workqueue_struct *wq)
{
int err;
struct padata_instance *pinst;
struct parallel_data *pd;
pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
if (!pinst)
goto err;
pd = padata_alloc_pd(pinst, cpumask);
if (!pd)
goto err_free_inst;
rcu_assign_pointer(pinst->pd, pd);
pinst->wq = wq;
cpumask_copy(pinst->cpumask, cpumask);
pinst->flags = 0;
pinst->cpu_notifier.notifier_call = padata_cpu_callback;
pinst->cpu_notifier.priority = 0;
err = register_hotcpu_notifier(&pinst->cpu_notifier);
if (err)
goto err_free_pd;
mutex_init(&pinst->lock);
return pinst;
err_free_pd:
padata_free_pd(pd);
err_free_inst:
kfree(pinst);
err:
return NULL;
}
EXPORT_SYMBOL(padata_alloc);
/*
* padata_free - free a padata instance
*
* @ padata_inst: padata instance to free
*/
void padata_free(struct padata_instance *pinst)
{
padata_stop(pinst);
synchronize_rcu();
while (atomic_read(&pinst->pd->refcnt) != 0)
yield();
unregister_hotcpu_notifier(&pinst->cpu_notifier);
padata_free_pd(pinst->pd);
kfree(pinst);
}
EXPORT_SYMBOL(padata_free);

File diff suppressed because it is too large Load diff

View file

@ -367,7 +367,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
struct task_struct *result = NULL;
if (pid) {
struct hlist_node *first;
first = rcu_dereference(pid->tasks[type].first);
first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock));
if (first)
result = hlist_entry(first, struct task_struct, pids[(type)].node);
}

View file

@ -256,7 +256,7 @@ static int posix_get_monotonic_coarse(clockid_t which_clock,
return 0;
}
int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp)
static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp)
{
*tp = ktime_to_timespec(KTIME_LOW_RES);
return 0;

View file

@ -27,6 +27,15 @@ config PM_DEBUG
code. This is helpful when debugging and reporting PM bugs, like
suspend support.
config PM_ADVANCED_DEBUG
bool "Extra PM attributes in sysfs for low-level debugging/testing"
depends on PM_DEBUG
default n
---help---
Add extra sysfs attributes allowing one to access some Power Management
fields of device objects from user space. If you are not a kernel
developer interested in debugging/testing Power Management, say "no".
config PM_VERBOSE
bool "Verbose Power Management debugging"
depends on PM_DEBUG
@ -85,6 +94,11 @@ config PM_SLEEP
depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
default y
config PM_SLEEP_ADVANCED_DEBUG
bool
depends on PM_ADVANCED_DEBUG
default n
config SUSPEND
bool "Suspend to RAM and standby"
depends on PM && ARCH_SUSPEND_POSSIBLE
@ -222,3 +236,8 @@ config PM_RUNTIME
and the bus type drivers of the buses the devices are on are
responsible for the actual handling of the autosuspend requests and
wake-up events.
config PM_OPS
bool
depends on PM_SLEEP || PM_RUNTIME
default y

View file

@ -44,6 +44,32 @@ int pm_notifier_call_chain(unsigned long val)
== NOTIFY_BAD) ? -EINVAL : 0;
}
/* If set, devices may be suspended and resumed asynchronously. */
int pm_async_enabled = 1;
static ssize_t pm_async_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
return sprintf(buf, "%d\n", pm_async_enabled);
}
static ssize_t pm_async_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t n)
{
unsigned long val;
if (strict_strtoul(buf, 10, &val))
return -EINVAL;
if (val > 1)
return -EINVAL;
pm_async_enabled = val;
return n;
}
power_attr(pm_async);
#ifdef CONFIG_PM_DEBUG
int pm_test_level = TEST_NONE;
@ -208,8 +234,11 @@ static struct attribute * g[] = {
#ifdef CONFIG_PM_TRACE
&pm_trace_attr.attr,
#endif
#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_PM_DEBUG)
#ifdef CONFIG_PM_SLEEP
&pm_async_attr.attr,
#ifdef CONFIG_PM_DEBUG
&pm_test_attr.attr,
#endif
#endif
NULL,
};

View file

@ -1181,7 +1181,7 @@ static void free_unnecessary_pages(void)
memory_bm_position_reset(&copy_bm);
while (to_free_normal > 0 && to_free_highmem > 0) {
while (to_free_normal > 0 || to_free_highmem > 0) {
unsigned long pfn = memory_bm_next_pfn(&copy_bm);
struct page *page = pfn_to_page(pfn);
@ -1500,7 +1500,7 @@ asmlinkage int swsusp_save(void)
{
unsigned int nr_pages, nr_highmem;
printk(KERN_INFO "PM: Creating hibernation image: \n");
printk(KERN_INFO "PM: Creating hibernation image:\n");
drain_local_pages(NULL);
nr_pages = count_data_pages();

View file

@ -657,10 +657,6 @@ int swsusp_read(unsigned int *flags_p)
struct swsusp_info *header;
*flags_p = swsusp_header->flags;
if (IS_ERR(resume_bdev)) {
pr_debug("PM: Image device not initialised\n");
return PTR_ERR(resume_bdev);
}
memset(&snapshot, 0, sizeof(struct snapshot_handle));
error = snapshot_write_next(&snapshot, PAGE_SIZE);

View file

@ -1,58 +0,0 @@
/*
* linux/kernel/power/swsusp.c
*
* This file provides code to write suspend image to swap and read it back.
*
* Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
* Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
*
* This file is released under the GPLv2.
*
* I'd like to thank the following people for their work:
*
* Pavel Machek <pavel@ucw.cz>:
* Modifications, defectiveness pointing, being with me at the very beginning,
* suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
*
* Steve Doddi <dirk@loth.demon.co.uk>:
* Support the possibility of hardware state restoring.
*
* Raph <grey.havens@earthling.net>:
* Support for preserving states of network devices and virtual console
* (including X and svgatextmode)
*
* Kurt Garloff <garloff@suse.de>:
* Straightened the critical function in order to prevent compilers from
* playing tricks with local variables.
*
* Andreas Mohr <a.mohr@mailto.de>
*
* Alex Badea <vampire@go.ro>:
* Fixed runaway init
*
* Rafael J. Wysocki <rjw@sisk.pl>
* Reworked the freeing of memory and the handling of swap
*
* More state savers are welcome. Especially for the scsi layer...
*
* For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
*/
#include <linux/mm.h>
#include <linux/suspend.h>
#include <linux/spinlock.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/swap.h>
#include <linux/pm.h>
#include <linux/swapops.h>
#include <linux/bootmem.h>
#include <linux/syscalls.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/rbtree.h>
#include <linux/io.h>
#include "power.h"
int in_suspend __nosavedata = 0;

View file

@ -195,6 +195,15 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
return res;
}
static void snapshot_deprecated_ioctl(unsigned int cmd)
{
if (printk_ratelimit())
printk(KERN_NOTICE "%pf: ioctl '%.8x' is deprecated and will "
"be removed soon, update your suspend-to-disk "
"utilities\n",
__builtin_return_address(0), cmd);
}
static long snapshot_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
@ -246,8 +255,9 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
data->frozen = 0;
break;
case SNAPSHOT_CREATE_IMAGE:
case SNAPSHOT_ATOMIC_SNAPSHOT:
snapshot_deprecated_ioctl(cmd);
case SNAPSHOT_CREATE_IMAGE:
if (data->mode != O_RDONLY || !data->frozen || data->ready) {
error = -EPERM;
break;
@ -275,8 +285,9 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
data->ready = 0;
break;
case SNAPSHOT_PREF_IMAGE_SIZE:
case SNAPSHOT_SET_IMAGE_SIZE:
snapshot_deprecated_ioctl(cmd);
case SNAPSHOT_PREF_IMAGE_SIZE:
image_size = arg;
break;
@ -290,15 +301,17 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
error = put_user(size, (loff_t __user *)arg);
break;
case SNAPSHOT_AVAIL_SWAP_SIZE:
case SNAPSHOT_AVAIL_SWAP:
snapshot_deprecated_ioctl(cmd);
case SNAPSHOT_AVAIL_SWAP_SIZE:
size = count_swap_pages(data->swap, 1);
size <<= PAGE_SHIFT;
error = put_user(size, (loff_t __user *)arg);
break;
case SNAPSHOT_ALLOC_SWAP_PAGE:
case SNAPSHOT_GET_SWAP_PAGE:
snapshot_deprecated_ioctl(cmd);
case SNAPSHOT_ALLOC_SWAP_PAGE:
if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
error = -ENODEV;
break;
@ -321,6 +334,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
break;
case SNAPSHOT_SET_SWAP_FILE: /* This ioctl is deprecated */
snapshot_deprecated_ioctl(cmd);
if (!swsusp_swap_in_use()) {
/*
* User space encodes device types as two-byte values,
@ -362,6 +376,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
break;
case SNAPSHOT_PMOPS: /* This ioctl is deprecated */
snapshot_deprecated_ioctl(cmd);
error = -EINVAL;
switch (arg) {

View file

@ -22,6 +22,7 @@
#include <linux/pid_namespace.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <linux/regset.h>
/*
@ -511,6 +512,47 @@ static int ptrace_resume(struct task_struct *child, long request, long data)
return 0;
}
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
static const struct user_regset *
find_regset(const struct user_regset_view *view, unsigned int type)
{
const struct user_regset *regset;
int n;
for (n = 0; n < view->n; ++n) {
regset = view->regsets + n;
if (regset->core_note_type == type)
return regset;
}
return NULL;
}
static int ptrace_regset(struct task_struct *task, int req, unsigned int type,
struct iovec *kiov)
{
const struct user_regset_view *view = task_user_regset_view(task);
const struct user_regset *regset = find_regset(view, type);
int regset_no;
if (!regset || (kiov->iov_len % regset->size) != 0)
return -EINVAL;
regset_no = regset - view->regsets;
kiov->iov_len = min(kiov->iov_len,
(__kernel_size_t) (regset->n * regset->size));
if (req == PTRACE_GETREGSET)
return copy_regset_to_user(task, view, regset_no, 0,
kiov->iov_len, kiov->iov_base);
else
return copy_regset_from_user(task, view, regset_no, 0,
kiov->iov_len, kiov->iov_base);
}
#endif
int ptrace_request(struct task_struct *child, long request,
long addr, long data)
{
@ -573,6 +615,26 @@ int ptrace_request(struct task_struct *child, long request,
return 0;
return ptrace_resume(child, request, SIGKILL);
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
case PTRACE_GETREGSET:
case PTRACE_SETREGSET:
{
struct iovec kiov;
struct iovec __user *uiov = (struct iovec __user *) data;
if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov)))
return -EFAULT;
if (__get_user(kiov.iov_base, &uiov->iov_base) ||
__get_user(kiov.iov_len, &uiov->iov_len))
return -EFAULT;
ret = ptrace_regset(child, request, addr, &kiov);
if (!ret)
ret = __put_user(kiov.iov_len, &uiov->iov_len);
break;
}
#endif
default:
break;
}
@ -711,6 +773,32 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
else
ret = ptrace_setsiginfo(child, &siginfo);
break;
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
case PTRACE_GETREGSET:
case PTRACE_SETREGSET:
{
struct iovec kiov;
struct compat_iovec __user *uiov =
(struct compat_iovec __user *) datap;
compat_uptr_t ptr;
compat_size_t len;
if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov)))
return -EFAULT;
if (__get_user(ptr, &uiov->iov_base) ||
__get_user(len, &uiov->iov_len))
return -EFAULT;
kiov.iov_base = compat_ptr(ptr);
kiov.iov_len = len;
ret = ptrace_regset(child, request, addr, &kiov);
if (!ret)
ret = __put_user(kiov.iov_len, &uiov->iov_len);
break;
}
#endif
default:
ret = ptrace_request(child, request, addr, data);

View file

@ -44,14 +44,43 @@
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/kernel_stat.h>
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
struct lockdep_map rcu_lock_map =
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
EXPORT_SYMBOL_GPL(rcu_lock_map);
static struct lock_class_key rcu_bh_lock_key;
struct lockdep_map rcu_bh_lock_map =
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_bh", &rcu_bh_lock_key);
EXPORT_SYMBOL_GPL(rcu_bh_lock_map);
static struct lock_class_key rcu_sched_lock_key;
struct lockdep_map rcu_sched_lock_map =
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key);
EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
#endif
int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
/*
* This function is invoked towards the end of the scheduler's initialization
* process. Before this is called, the idle task might contain
* RCU read-side critical sections (during which time, this idle
* task is booting the system). After this function is called, the
* idle tasks are prohibited from containing RCU read-side critical
* sections.
*/
void rcu_scheduler_starting(void)
{
WARN_ON(num_online_cpus() != 1);
WARN_ON(nr_context_switches() > 0);
rcu_scheduler_active = 1;
}
/*
* Awaken the corresponding synchronize_rcu() instance now that a
* grace period has elapsed.

View file

@ -61,6 +61,9 @@ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
static int stutter = 5; /* Start/stop testing interval (in sec) */
static int irqreader = 1; /* RCU readers from irq (timers). */
static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */
static int fqs_holdoff = 0; /* Hold time within burst (us). */
static int fqs_stutter = 3; /* Wait time between bursts (s). */
static char *torture_type = "rcu"; /* What RCU implementation to torture. */
module_param(nreaders, int, 0444);
@ -79,6 +82,12 @@ module_param(stutter, int, 0444);
MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test");
module_param(irqreader, int, 0444);
MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers");
module_param(fqs_duration, int, 0444);
MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us)");
module_param(fqs_holdoff, int, 0444);
MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
module_param(fqs_stutter, int, 0444);
MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
module_param(torture_type, charp, 0444);
MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
@ -99,6 +108,7 @@ static struct task_struct **reader_tasks;
static struct task_struct *stats_task;
static struct task_struct *shuffler_task;
static struct task_struct *stutter_task;
static struct task_struct *fqs_task;
#define RCU_TORTURE_PIPE_LEN 10
@ -263,6 +273,7 @@ struct rcu_torture_ops {
void (*deferred_free)(struct rcu_torture *p);
void (*sync)(void);
void (*cb_barrier)(void);
void (*fqs)(void);
int (*stats)(char *page);
int irq_capable;
char *name;
@ -347,6 +358,7 @@ static struct rcu_torture_ops rcu_ops = {
.deferred_free = rcu_torture_deferred_free,
.sync = synchronize_rcu,
.cb_barrier = rcu_barrier,
.fqs = rcu_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu"
@ -388,6 +400,7 @@ static struct rcu_torture_ops rcu_sync_ops = {
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu,
.cb_barrier = NULL,
.fqs = rcu_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_sync"
@ -403,6 +416,7 @@ static struct rcu_torture_ops rcu_expedited_ops = {
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu_expedited,
.cb_barrier = NULL,
.fqs = rcu_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_expedited"
@ -465,6 +479,7 @@ static struct rcu_torture_ops rcu_bh_ops = {
.deferred_free = rcu_bh_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
.cb_barrier = rcu_barrier_bh,
.fqs = rcu_bh_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_bh"
@ -480,6 +495,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
.deferred_free = rcu_sync_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
.cb_barrier = NULL,
.fqs = rcu_bh_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_bh_sync"
@ -621,6 +637,7 @@ static struct rcu_torture_ops sched_ops = {
.deferred_free = rcu_sched_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = rcu_barrier_sched,
.fqs = rcu_sched_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "sched"
@ -636,6 +653,7 @@ static struct rcu_torture_ops sched_sync_ops = {
.deferred_free = rcu_sync_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = NULL,
.fqs = rcu_sched_force_quiescent_state,
.stats = NULL,
.name = "sched_sync"
};
@ -650,11 +668,44 @@ static struct rcu_torture_ops sched_expedited_ops = {
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_sched_expedited,
.cb_barrier = NULL,
.fqs = rcu_sched_force_quiescent_state,
.stats = rcu_expedited_torture_stats,
.irq_capable = 1,
.name = "sched_expedited"
};
/*
* RCU torture force-quiescent-state kthread. Repeatedly induces
* bursts of calls to force_quiescent_state(), increasing the probability
* of occurrence of some important types of race conditions.
*/
static int
rcu_torture_fqs(void *arg)
{
unsigned long fqs_resume_time;
int fqs_burst_remaining;
VERBOSE_PRINTK_STRING("rcu_torture_fqs task started");
do {
fqs_resume_time = jiffies + fqs_stutter * HZ;
while (jiffies - fqs_resume_time > LONG_MAX) {
schedule_timeout_interruptible(1);
}
fqs_burst_remaining = fqs_duration;
while (fqs_burst_remaining > 0) {
cur_ops->fqs();
udelay(fqs_holdoff);
fqs_burst_remaining -= fqs_holdoff;
}
rcu_stutter_wait("rcu_torture_fqs");
} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
VERBOSE_PRINTK_STRING("rcu_torture_fqs task stopping");
rcutorture_shutdown_absorb("rcu_torture_fqs");
while (!kthread_should_stop())
schedule_timeout_uninterruptible(1);
return 0;
}
/*
* RCU torture writer kthread. Repeatedly substitutes a new structure
* for that pointed to by rcu_torture_current, freeing the old structure
@ -745,7 +796,11 @@ static void rcu_torture_timer(unsigned long unused)
idx = cur_ops->readlock();
completed = cur_ops->completed();
p = rcu_dereference(rcu_torture_current);
p = rcu_dereference_check(rcu_torture_current,
rcu_read_lock_held() ||
rcu_read_lock_bh_held() ||
rcu_read_lock_sched_held() ||
srcu_read_lock_held(&srcu_ctl));
if (p == NULL) {
/* Leave because rcu_torture_writer is not yet underway */
cur_ops->readunlock(idx);
@ -798,11 +853,15 @@ rcu_torture_reader(void *arg)
do {
if (irqreader && cur_ops->irq_capable) {
if (!timer_pending(&t))
mod_timer(&t, 1);
mod_timer(&t, jiffies + 1);
}
idx = cur_ops->readlock();
completed = cur_ops->completed();
p = rcu_dereference(rcu_torture_current);
p = rcu_dereference_check(rcu_torture_current,
rcu_read_lock_held() ||
rcu_read_lock_bh_held() ||
rcu_read_lock_sched_held() ||
srcu_read_lock_held(&srcu_ctl));
if (p == NULL) {
/* Wait for rcu_torture_writer to get underway */
cur_ops->readunlock(idx);
@ -1030,10 +1089,11 @@ rcu_torture_print_module_parms(char *tag)
printk(KERN_ALERT "%s" TORTURE_FLAG
"--- %s: nreaders=%d nfakewriters=%d "
"stat_interval=%d verbose=%d test_no_idle_hz=%d "
"shuffle_interval=%d stutter=%d irqreader=%d\n",
"shuffle_interval=%d stutter=%d irqreader=%d "
"fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n",
torture_type, tag, nrealreaders, nfakewriters,
stat_interval, verbose, test_no_idle_hz, shuffle_interval,
stutter, irqreader);
stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter);
}
static struct notifier_block rcutorture_nb = {
@ -1109,6 +1169,12 @@ rcu_torture_cleanup(void)
}
stats_task = NULL;
if (fqs_task) {
VERBOSE_PRINTK_STRING("Stopping rcu_torture_fqs task");
kthread_stop(fqs_task);
}
fqs_task = NULL;
/* Wait for all RCU callbacks to fire. */
if (cur_ops->cb_barrier != NULL)
@ -1154,6 +1220,11 @@ rcu_torture_init(void)
mutex_unlock(&fullstop_mutex);
return -EINVAL;
}
if (cur_ops->fqs == NULL && fqs_duration != 0) {
printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero "
"fqs_duration, fqs disabled.\n");
fqs_duration = 0;
}
if (cur_ops->init)
cur_ops->init(); /* no "goto unwind" prior to this point!!! */
@ -1282,6 +1353,19 @@ rcu_torture_init(void)
goto unwind;
}
}
if (fqs_duration < 0)
fqs_duration = 0;
if (fqs_duration) {
/* Create the stutter thread */
fqs_task = kthread_run(rcu_torture_fqs, NULL,
"rcu_torture_fqs");
if (IS_ERR(fqs_task)) {
firsterr = PTR_ERR(fqs_task);
VERBOSE_PRINTK_ERRSTRING("Failed to create fqs");
fqs_task = NULL;
goto unwind;
}
}
register_reboot_notifier(&rcutorture_nb);
mutex_unlock(&fullstop_mutex);
return 0;

View file

@ -46,7 +46,6 @@
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/time.h>
#include <linux/kernel_stat.h>
#include "rcutree.h"
@ -66,11 +65,11 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
.signaled = RCU_GP_IDLE, \
.gpnum = -300, \
.completed = -300, \
.onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \
.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&name.onofflock), \
.orphan_cbs_list = NULL, \
.orphan_cbs_tail = &name.orphan_cbs_list, \
.orphan_qlen = 0, \
.fqslock = __SPIN_LOCK_UNLOCKED(&name.fqslock), \
.fqslock = __RAW_SPIN_LOCK_UNLOCKED(&name.fqslock), \
.n_force_qs = 0, \
.n_force_qs_ngp = 0, \
}
@ -81,9 +80,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
static int rcu_scheduler_active __read_mostly;
/*
* Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
* permit this function to be invoked without holding the root rcu_node
@ -156,6 +152,24 @@ long rcu_batches_completed_bh(void)
}
EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
/*
* Force a quiescent state for RCU BH.
*/
void rcu_bh_force_quiescent_state(void)
{
force_quiescent_state(&rcu_bh_state, 0);
}
EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
/*
* Force a quiescent state for RCU-sched.
*/
void rcu_sched_force_quiescent_state(void)
{
force_quiescent_state(&rcu_sched_state, 0);
}
EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
/*
* Does the CPU have callbacks ready to be invoked?
*/
@ -439,10 +453,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
/* Only let one CPU complain about others per time interval. */
spin_lock_irqsave(&rnp->lock, flags);
raw_spin_lock_irqsave(&rnp->lock, flags);
delta = jiffies - rsp->jiffies_stall;
if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
@ -452,13 +466,15 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
* due to CPU offlining.
*/
rcu_print_task_stall(rnp);
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
/* OK, time to rat on our buddy... */
printk(KERN_ERR "INFO: RCU detected CPU stalls:");
rcu_for_each_leaf_node(rsp, rnp) {
raw_spin_lock_irqsave(&rnp->lock, flags);
rcu_print_task_stall(rnp);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (rnp->qsmask == 0)
continue;
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
@ -469,6 +485,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
smp_processor_id(), (long)(jiffies - rsp->gp_start));
trigger_all_cpu_backtrace();
/* If so configured, complain about tasks blocking the grace period. */
rcu_print_detail_task_stall(rsp);
force_quiescent_state(rsp, 0); /* Kick them all. */
}
@ -481,11 +501,11 @@ static void print_cpu_stall(struct rcu_state *rsp)
smp_processor_id(), jiffies - rsp->gp_start);
trigger_all_cpu_backtrace();
spin_lock_irqsave(&rnp->lock, flags);
if ((long)(jiffies - rsp->jiffies_stall) >= 0)
raw_spin_lock_irqsave(&rnp->lock, flags);
if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
rsp->jiffies_stall =
jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
set_need_resched(); /* kick ourselves to get things going. */
}
@ -545,12 +565,12 @@ static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
local_irq_save(flags);
rnp = rdp->mynode;
if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */
!spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */
!raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
local_irq_restore(flags);
return;
}
__note_new_gpnum(rsp, rnp, rdp);
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
/*
@ -609,12 +629,12 @@ rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
local_irq_save(flags);
rnp = rdp->mynode;
if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */
!spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */
!raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
local_irq_restore(flags);
return;
}
__rcu_process_gp_end(rsp, rnp, rdp);
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
/*
@ -659,12 +679,14 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
struct rcu_data *rdp = rsp->rda[smp_processor_id()];
struct rcu_node *rnp = rcu_get_root(rsp);
if (!cpu_needs_another_gp(rsp, rdp)) {
if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) {
if (cpu_needs_another_gp(rsp, rdp))
rsp->fqs_need_gp = 1;
if (rnp->completed == rsp->completed) {
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
spin_unlock(&rnp->lock); /* irqs remain disabled. */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
/*
* Propagate new ->completed value to rcu_node structures
@ -672,9 +694,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
* of the next grace period to process their callbacks.
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
spin_lock(&rnp->lock); /* irqs already disabled. */
raw_spin_lock(&rnp->lock); /* irqs already disabled. */
rnp->completed = rsp->completed;
spin_unlock(&rnp->lock); /* irqs remain disabled. */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
local_irq_restore(flags);
return;
@ -695,15 +717,15 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
rnp->completed = rsp->completed;
rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
rcu_start_gp_per_cpu(rsp, rnp, rdp);
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
spin_unlock(&rnp->lock); /* leave irqs disabled. */
raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */
/* Exclude any concurrent CPU-hotplug operations. */
spin_lock(&rsp->onofflock); /* irqs already disabled. */
raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
/*
* Set the quiescent-state-needed bits in all the rcu_node
@ -723,21 +745,21 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
* irqs disabled.
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
spin_lock(&rnp->lock); /* irqs already disabled. */
raw_spin_lock(&rnp->lock); /* irqs already disabled. */
rcu_preempt_check_blocked_tasks(rnp);
rnp->qsmask = rnp->qsmaskinit;
rnp->gpnum = rsp->gpnum;
rnp->completed = rsp->completed;
if (rnp == rdp->mynode)
rcu_start_gp_per_cpu(rsp, rnp, rdp);
spin_unlock(&rnp->lock); /* irqs remain disabled. */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
rnp = rcu_get_root(rsp);
spin_lock(&rnp->lock); /* irqs already disabled. */
raw_spin_lock(&rnp->lock); /* irqs already disabled. */
rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
spin_unlock(&rnp->lock); /* irqs remain disabled. */
spin_unlock_irqrestore(&rsp->onofflock, flags);
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
}
/*
@ -776,14 +798,14 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
if (!(rnp->qsmask & mask)) {
/* Our bit has already been cleared, so done. */
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
rnp->qsmask &= ~mask;
if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
/* Other bits still set at this level, so done. */
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
mask = rnp->grpmask;
@ -793,10 +815,10 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
break;
}
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
rnp_c = rnp;
rnp = rnp->parent;
spin_lock_irqsave(&rnp->lock, flags);
raw_spin_lock_irqsave(&rnp->lock, flags);
WARN_ON_ONCE(rnp_c->qsmask);
}
@ -825,7 +847,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las
struct rcu_node *rnp;
rnp = rdp->mynode;
spin_lock_irqsave(&rnp->lock, flags);
raw_spin_lock_irqsave(&rnp->lock, flags);
if (lastcomp != rnp->completed) {
/*
@ -837,12 +859,12 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las
* race occurred.
*/
rdp->passed_quiesc = 0; /* try again later! */
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
mask = rdp->grpmask;
if ((rnp->qsmask & mask) == 0) {
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
} else {
rdp->qs_pending = 0;
@ -906,7 +928,7 @@ static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
if (rdp->nxtlist == NULL)
return; /* irqs disabled, so comparison is stable. */
spin_lock(&rsp->onofflock); /* irqs already disabled. */
raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
*rsp->orphan_cbs_tail = rdp->nxtlist;
rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL];
rdp->nxtlist = NULL;
@ -914,7 +936,7 @@ static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
rdp->nxttail[i] = &rdp->nxtlist;
rsp->orphan_qlen += rdp->qlen;
rdp->qlen = 0;
spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
}
/*
@ -925,10 +947,10 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
unsigned long flags;
struct rcu_data *rdp;
spin_lock_irqsave(&rsp->onofflock, flags);
raw_spin_lock_irqsave(&rsp->onofflock, flags);
rdp = rsp->rda[smp_processor_id()];
if (rsp->orphan_cbs_list == NULL) {
spin_unlock_irqrestore(&rsp->onofflock, flags);
raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
return;
}
*rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list;
@ -937,7 +959,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
rsp->orphan_cbs_list = NULL;
rsp->orphan_cbs_tail = &rsp->orphan_cbs_list;
rsp->orphan_qlen = 0;
spin_unlock_irqrestore(&rsp->onofflock, flags);
raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
}
/*
@ -953,23 +975,23 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
struct rcu_node *rnp;
/* Exclude any attempts to start a new grace period. */
spin_lock_irqsave(&rsp->onofflock, flags);
raw_spin_lock_irqsave(&rsp->onofflock, flags);
/* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */
mask = rdp->grpmask; /* rnp->grplo is constant. */
do {
spin_lock(&rnp->lock); /* irqs already disabled. */
raw_spin_lock(&rnp->lock); /* irqs already disabled. */
rnp->qsmaskinit &= ~mask;
if (rnp->qsmaskinit != 0) {
if (rnp != rdp->mynode)
spin_unlock(&rnp->lock); /* irqs remain disabled. */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
break;
}
if (rnp == rdp->mynode)
need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
else
spin_unlock(&rnp->lock); /* irqs remain disabled. */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
mask = rnp->grpmask;
rnp = rnp->parent;
} while (rnp != NULL);
@ -980,12 +1002,12 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
* because invoking rcu_report_unblock_qs_rnp() with ->onofflock
* held leads to deadlock.
*/
spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
rnp = rdp->mynode;
if (need_report & RCU_OFL_TASKS_NORM_GP)
rcu_report_unblock_qs_rnp(rnp, flags);
else
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (need_report & RCU_OFL_TASKS_EXP_GP)
rcu_report_exp_rnp(rsp, rnp);
@ -1144,11 +1166,9 @@ void rcu_check_callbacks(int cpu, int user)
/*
* Scan the leaf rcu_node structures, processing dyntick state for any that
* have not yet encountered a quiescent state, using the function specified.
* Returns 1 if the current grace period ends while scanning (possibly
* because we made it end).
* The caller must have suppressed start of new grace periods.
*/
static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
int (*f)(struct rcu_data *))
static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
{
unsigned long bit;
int cpu;
@ -1158,13 +1178,13 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
rcu_for_each_leaf_node(rsp, rnp) {
mask = 0;
spin_lock_irqsave(&rnp->lock, flags);
if (rnp->completed != lastcomp) {
spin_unlock_irqrestore(&rnp->lock, flags);
return 1;
raw_spin_lock_irqsave(&rnp->lock, flags);
if (!rcu_gp_in_progress(rsp)) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
if (rnp->qsmask == 0) {
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
continue;
}
cpu = rnp->grplo;
@ -1173,15 +1193,14 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu]))
mask |= bit;
}
if (mask != 0 && rnp->completed == lastcomp) {
if (mask != 0) {
/* rcu_report_qs_rnp() releases rnp->lock. */
rcu_report_qs_rnp(mask, rsp, rnp, flags);
continue;
}
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
return 0;
}
/*
@ -1191,32 +1210,26 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
{
unsigned long flags;
long lastcomp;
struct rcu_node *rnp = rcu_get_root(rsp);
u8 signaled;
u8 forcenow;
if (!rcu_gp_in_progress(rsp))
return; /* No grace period in progress, nothing to force. */
if (!spin_trylock_irqsave(&rsp->fqslock, flags)) {
if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) {
rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */
return; /* Someone else is already on the job. */
}
if (relaxed &&
(long)(rsp->jiffies_force_qs - jiffies) >= 0)
goto unlock_ret; /* no emergency and done recently. */
if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies))
goto unlock_fqs_ret; /* no emergency and done recently. */
rsp->n_force_qs++;
spin_lock(&rnp->lock);
lastcomp = rsp->gpnum - 1;
signaled = rsp->signaled;
raw_spin_lock(&rnp->lock); /* irqs already disabled */
rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
if(!rcu_gp_in_progress(rsp)) {
rsp->n_force_qs_ngp++;
spin_unlock(&rnp->lock);
goto unlock_ret; /* no GP in progress, time updated. */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
goto unlock_fqs_ret; /* no GP in progress, time updated. */
}
spin_unlock(&rnp->lock);
switch (signaled) {
rsp->fqs_active = 1;
switch (rsp->signaled) {
case RCU_GP_IDLE:
case RCU_GP_INIT:
@ -1224,45 +1237,38 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
case RCU_SAVE_DYNTICK:
raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
break; /* So gcc recognizes the dead code. */
/* Record dyntick-idle state. */
if (rcu_process_dyntick(rsp, lastcomp,
dyntick_save_progress_counter))
goto unlock_ret;
/* fall into next case. */
case RCU_SAVE_COMPLETED:
/* Update state, record completion counter. */
forcenow = 0;
spin_lock(&rnp->lock);
if (lastcomp + 1 == rsp->gpnum &&
lastcomp == rsp->completed &&
rsp->signaled == signaled) {
force_qs_rnp(rsp, dyntick_save_progress_counter);
raw_spin_lock(&rnp->lock); /* irqs already disabled */
if (rcu_gp_in_progress(rsp))
rsp->signaled = RCU_FORCE_QS;
rsp->completed_fqs = lastcomp;
forcenow = signaled == RCU_SAVE_COMPLETED;
}
spin_unlock(&rnp->lock);
if (!forcenow)
break;
/* fall into next case. */
break;
case RCU_FORCE_QS:
/* Check dyntick-idle state, send IPI to laggarts. */
if (rcu_process_dyntick(rsp, rsp->completed_fqs,
rcu_implicit_dynticks_qs))
goto unlock_ret;
raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
/* Leave state in case more forcing is required. */
raw_spin_lock(&rnp->lock); /* irqs already disabled */
break;
}
unlock_ret:
spin_unlock_irqrestore(&rsp->fqslock, flags);
rsp->fqs_active = 0;
if (rsp->fqs_need_gp) {
raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */
rsp->fqs_need_gp = 0;
rcu_start_gp(rsp, flags); /* releases rnp->lock */
return;
}
raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
unlock_fqs_ret:
raw_spin_unlock_irqrestore(&rsp->fqslock, flags);
}
#else /* #ifdef CONFIG_SMP */
@ -1290,7 +1296,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
* If an RCU GP has gone long enough, go check for dyntick
* idle CPUs and, if needed, send resched IPIs.
*/
if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
force_quiescent_state(rsp, 1);
/*
@ -1304,7 +1310,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
/* Does this CPU require a not-yet-started grace period? */
if (cpu_needs_another_gp(rsp, rdp)) {
spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
rcu_start_gp(rsp, flags); /* releases above lock */
}
@ -1335,6 +1341,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
* grace-period manipulations above.
*/
smp_mb(); /* See above block comment. */
/* If we are last CPU on way to dyntick-idle mode, accelerate it. */
rcu_needs_cpu_flush();
}
static void
@ -1369,7 +1378,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
unsigned long nestflag;
struct rcu_node *rnp_root = rcu_get_root(rsp);
spin_lock_irqsave(&rnp_root->lock, nestflag);
raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */
}
@ -1387,7 +1396,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
force_quiescent_state(rsp, 0);
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->qlen_last_fqs_check = rdp->qlen;
} else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
} else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
force_quiescent_state(rsp, 1);
local_irq_restore(flags);
}
@ -1520,7 +1529,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
/* Has an RCU GP gone long enough to send resched IPIs &c? */
if (rcu_gp_in_progress(rsp) &&
((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) {
ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) {
rdp->n_rp_need_fqs++;
return 1;
}
@ -1545,10 +1554,9 @@ static int rcu_pending(int cpu)
/*
* Check to see if any future RCU-related work will need to be done
* by the current CPU, even if none need be done immediately, returning
* 1 if so. This function is part of the RCU implementation; it is -not-
* an exported member of the RCU API.
* 1 if so.
*/
int rcu_needs_cpu(int cpu)
static int rcu_needs_cpu_quick_check(int cpu)
{
/* RCU callbacks either ready or pending? */
return per_cpu(rcu_sched_data, cpu).nxtlist ||
@ -1556,21 +1564,6 @@ int rcu_needs_cpu(int cpu)
rcu_preempt_needs_cpu(cpu);
}
/*
* This function is invoked towards the end of the scheduler's initialization
* process. Before this is called, the idle task might contain
* RCU read-side critical sections (during which time, this idle
* task is booting the system). After this function is called, the
* idle tasks are prohibited from containing RCU read-side critical
* sections.
*/
void rcu_scheduler_starting(void)
{
WARN_ON(num_online_cpus() != 1);
WARN_ON(nr_context_switches() > 0);
rcu_scheduler_active = 1;
}
static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
static atomic_t rcu_barrier_cpu_count;
static DEFINE_MUTEX(rcu_barrier_mutex);
@ -1659,7 +1652,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
struct rcu_node *rnp = rcu_get_root(rsp);
/* Set up local state, ensuring consistent view of global state. */
spin_lock_irqsave(&rnp->lock, flags);
raw_spin_lock_irqsave(&rnp->lock, flags);
rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
rdp->nxtlist = NULL;
for (i = 0; i < RCU_NEXT_SIZE; i++)
@ -1669,7 +1662,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
#endif /* #ifdef CONFIG_NO_HZ */
rdp->cpu = cpu;
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
/*
@ -1687,7 +1680,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
struct rcu_node *rnp = rcu_get_root(rsp);
/* Set up local state, ensuring consistent view of global state. */
spin_lock_irqsave(&rnp->lock, flags);
raw_spin_lock_irqsave(&rnp->lock, flags);
rdp->passed_quiesc = 0; /* We could be racing with new GP, */
rdp->qs_pending = 1; /* so set up to respond to current GP. */
rdp->beenonline = 1; /* We have now been online. */
@ -1695,7 +1688,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
rdp->qlen_last_fqs_check = 0;
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->blimit = blimit;
spin_unlock(&rnp->lock); /* irqs remain disabled. */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
/*
* A new grace period might start here. If so, we won't be part
@ -1703,14 +1696,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
*/
/* Exclude any attempts to start a new GP on large systems. */
spin_lock(&rsp->onofflock); /* irqs already disabled. */
raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
/* Add CPU to rcu_node bitmasks. */
rnp = rdp->mynode;
mask = rdp->grpmask;
do {
/* Exclude any attempts to start a new GP on small systems. */
spin_lock(&rnp->lock); /* irqs already disabled. */
raw_spin_lock(&rnp->lock); /* irqs already disabled. */
rnp->qsmaskinit |= mask;
mask = rnp->grpmask;
if (rnp == rdp->mynode) {
@ -1718,11 +1711,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
rdp->completed = rnp->completed;
rdp->passed_quiesc_completed = rnp->completed - 1;
}
spin_unlock(&rnp->lock); /* irqs already disabled. */
raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
rnp = rnp->parent;
} while (rnp != NULL && !(rnp->qsmaskinit & mask));
spin_unlock_irqrestore(&rsp->onofflock, flags);
raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
}
static void __cpuinit rcu_online_cpu(int cpu)
@ -1806,11 +1799,17 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
*/
static void __init rcu_init_one(struct rcu_state *rsp)
{
static char *buf[] = { "rcu_node_level_0",
"rcu_node_level_1",
"rcu_node_level_2",
"rcu_node_level_3" }; /* Match MAX_RCU_LVLS */
int cpustride = 1;
int i;
int j;
struct rcu_node *rnp;
BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
/* Initialize the level-tracking arrays. */
for (i = 1; i < NUM_RCU_LVLS; i++)
@ -1823,8 +1822,9 @@ static void __init rcu_init_one(struct rcu_state *rsp)
cpustride *= rsp->levelspread[i];
rnp = rsp->level[i];
for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
spin_lock_init(&rnp->lock);
lockdep_set_class(&rnp->lock, &rcu_node_class[i]);
raw_spin_lock_init(&rnp->lock);
lockdep_set_class_and_name(&rnp->lock,
&rcu_node_class[i], buf[i]);
rnp->gpnum = 0;
rnp->qsmask = 0;
rnp->qsmaskinit = 0;
@ -1876,7 +1876,7 @@ do { \
void __init rcu_init(void)
{
int i;
int cpu;
rcu_bootup_announce();
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
@ -1896,8 +1896,8 @@ void __init rcu_init(void)
* or the scheduler are operational.
*/
cpu_notifier(rcu_cpu_notify, 0);
for_each_online_cpu(i)
rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)i);
for_each_online_cpu(cpu)
rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
}
#include "rcutree_plugin.h"

View file

@ -90,12 +90,12 @@ struct rcu_dynticks {
* Definition for node within the RCU grace-period-detection hierarchy.
*/
struct rcu_node {
spinlock_t lock; /* Root rcu_node's lock protects some */
raw_spinlock_t lock; /* Root rcu_node's lock protects some */
/* rcu_state fields as well as following. */
long gpnum; /* Current grace period for this node. */
unsigned long gpnum; /* Current grace period for this node. */
/* This will either be equal to or one */
/* behind the root rcu_node's gpnum. */
long completed; /* Last grace period completed for this node. */
unsigned long completed; /* Last GP completed for this node. */
/* This will either be equal to or one */
/* behind the root rcu_node's gpnum. */
unsigned long qsmask; /* CPUs or groups that need to switch in */
@ -161,11 +161,11 @@ struct rcu_node {
/* Per-CPU data for read-copy update. */
struct rcu_data {
/* 1) quiescent-state and grace-period handling : */
long completed; /* Track rsp->completed gp number */
unsigned long completed; /* Track rsp->completed gp number */
/* in order to detect GP end. */
long gpnum; /* Highest gp number that this CPU */
unsigned long gpnum; /* Highest gp number that this CPU */
/* is aware of having started. */
long passed_quiesc_completed;
unsigned long passed_quiesc_completed;
/* Value of completed at time of qs. */
bool passed_quiesc; /* User-mode/idle loop etc. */
bool qs_pending; /* Core waits for quiesc state. */
@ -221,14 +221,14 @@ struct rcu_data {
unsigned long resched_ipi; /* Sent a resched IPI. */
/* 5) __rcu_pending() statistics. */
long n_rcu_pending; /* rcu_pending() calls since boot. */
long n_rp_qs_pending;
long n_rp_cb_ready;
long n_rp_cpu_needs_gp;
long n_rp_gp_completed;
long n_rp_gp_started;
long n_rp_need_fqs;
long n_rp_need_nothing;
unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */
unsigned long n_rp_qs_pending;
unsigned long n_rp_cb_ready;
unsigned long n_rp_cpu_needs_gp;
unsigned long n_rp_gp_completed;
unsigned long n_rp_gp_started;
unsigned long n_rp_need_fqs;
unsigned long n_rp_need_nothing;
int cpu;
};
@ -237,12 +237,11 @@ struct rcu_data {
#define RCU_GP_IDLE 0 /* No grace period in progress. */
#define RCU_GP_INIT 1 /* Grace period being initialized. */
#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
#define RCU_SAVE_COMPLETED 3 /* Need to save rsp->completed. */
#define RCU_FORCE_QS 4 /* Need to force quiescent state. */
#define RCU_FORCE_QS 3 /* Need to force quiescent state. */
#ifdef CONFIG_NO_HZ
#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
#else /* #ifdef CONFIG_NO_HZ */
#define RCU_SIGNAL_INIT RCU_SAVE_COMPLETED
#define RCU_SIGNAL_INIT RCU_FORCE_QS
#endif /* #else #ifdef CONFIG_NO_HZ */
#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
@ -256,6 +255,9 @@ struct rcu_data {
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
/*
* RCU global state, including node hierarchy. This hierarchy is
* represented in "heap" form in a dense array. The root (first level)
@ -277,12 +279,19 @@ struct rcu_state {
u8 signaled ____cacheline_internodealigned_in_smp;
/* Force QS state. */
long gpnum; /* Current gp number. */
long completed; /* # of last completed gp. */
u8 fqs_active; /* force_quiescent_state() */
/* is running. */
u8 fqs_need_gp; /* A CPU was prevented from */
/* starting a new grace */
/* period because */
/* force_quiescent_state() */
/* was running. */
unsigned long gpnum; /* Current gp number. */
unsigned long completed; /* # of last completed gp. */
/* End of fields guarded by root rcu_node's lock. */
spinlock_t onofflock; /* exclude on/offline and */
raw_spinlock_t onofflock; /* exclude on/offline and */
/* starting new GP. Also */
/* protects the following */
/* orphan_cbs fields. */
@ -292,10 +301,8 @@ struct rcu_state {
/* going offline. */
struct rcu_head **orphan_cbs_tail; /* And tail pointer. */
long orphan_qlen; /* Number of orphaned cbs. */
spinlock_t fqslock; /* Only one task forcing */
raw_spinlock_t fqslock; /* Only one task forcing */
/* quiescent states. */
long completed_fqs; /* Value of completed @ snap. */
/* Protected by fqslock. */
unsigned long jiffies_force_qs; /* Time at which to invoke */
/* force_quiescent_state(). */
unsigned long n_force_qs; /* Number of calls to */
@ -319,8 +326,6 @@ struct rcu_state {
#define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */
/* GP were moved to root. */
#ifdef RCU_TREE_NONCORE
/*
* RCU implementation internal declarations:
*/
@ -335,7 +340,7 @@ extern struct rcu_state rcu_preempt_state;
DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
#else /* #ifdef RCU_TREE_NONCORE */
#ifndef RCU_TREE_NONCORE
/* Forward declarations for rcutree_plugin.h */
static void rcu_bootup_announce(void);
@ -347,6 +352,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
unsigned long flags);
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
static void rcu_print_detail_task_stall(struct rcu_state *rsp);
static void rcu_print_task_stall(struct rcu_node *rnp);
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
@ -367,5 +373,6 @@ static int rcu_preempt_needs_cpu(int cpu);
static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
static void rcu_preempt_send_cbs_to_orphanage(void);
static void __init __rcu_init_preempt(void);
static void rcu_needs_cpu_flush(void);
#endif /* #else #ifdef RCU_TREE_NONCORE */
#endif /* #ifndef RCU_TREE_NONCORE */

View file

@ -61,6 +61,15 @@ long rcu_batches_completed(void)
}
EXPORT_SYMBOL_GPL(rcu_batches_completed);
/*
* Force a quiescent state for preemptible RCU.
*/
void rcu_force_quiescent_state(void)
{
force_quiescent_state(&rcu_preempt_state, 0);
}
EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
/*
* Record a preemptable-RCU quiescent state for the specified CPU. Note
* that this just means that the task currently running on the CPU is
@ -102,7 +111,7 @@ static void rcu_preempt_note_context_switch(int cpu)
/* Possibly blocking in an RCU read-side critical section. */
rdp = rcu_preempt_state.rda[cpu];
rnp = rdp->mynode;
spin_lock_irqsave(&rnp->lock, flags);
raw_spin_lock_irqsave(&rnp->lock, flags);
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
t->rcu_blocked_node = rnp;
@ -123,7 +132,7 @@ static void rcu_preempt_note_context_switch(int cpu)
WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1;
list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
/*
@ -180,7 +189,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
struct rcu_node *rnp_p;
if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return; /* Still need more quiescent states! */
}
@ -197,8 +206,8 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
/* Report up the rest of the hierarchy. */
mask = rnp->grpmask;
spin_unlock(&rnp->lock); /* irqs remain disabled. */
spin_lock(&rnp_p->lock); /* irqs already disabled. */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
}
@ -248,10 +257,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
*/
for (;;) {
rnp = t->rcu_blocked_node;
spin_lock(&rnp->lock); /* irqs already disabled. */
raw_spin_lock(&rnp->lock); /* irqs already disabled. */
if (rnp == t->rcu_blocked_node)
break;
spin_unlock(&rnp->lock); /* irqs remain disabled. */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
empty = !rcu_preempted_readers(rnp);
empty_exp = !rcu_preempted_readers_exp(rnp);
@ -265,7 +274,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
* Note that rcu_report_unblock_qs_rnp() releases rnp->lock.
*/
if (empty)
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
else
rcu_report_unblock_qs_rnp(rnp, flags);
@ -295,16 +304,21 @@ void __rcu_read_unlock(void)
if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
rcu_read_unlock_special(t);
#ifdef CONFIG_PROVE_LOCKING
WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0);
#endif /* #ifdef CONFIG_PROVE_LOCKING */
}
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
#ifdef CONFIG_RCU_CPU_STALL_VERBOSE
/*
* Scan the current list of tasks blocked within RCU read-side critical
* sections, printing out the tid of each.
* Dump detailed information for all tasks blocking the current RCU
* grace period on the specified rcu_node structure.
*/
static void rcu_print_task_stall(struct rcu_node *rnp)
static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
{
unsigned long flags;
struct list_head *lp;
@ -312,12 +326,51 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
struct task_struct *t;
if (rcu_preempted_readers(rnp)) {
spin_lock_irqsave(&rnp->lock, flags);
raw_spin_lock_irqsave(&rnp->lock, flags);
phase = rnp->gpnum & 0x1;
lp = &rnp->blocked_tasks[phase];
list_for_each_entry(t, lp, rcu_node_entry)
sched_show_task(t);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
}
/*
* Dump detailed information for all tasks blocking the current RCU
* grace period.
*/
static void rcu_print_detail_task_stall(struct rcu_state *rsp)
{
struct rcu_node *rnp = rcu_get_root(rsp);
rcu_print_detail_task_stall_rnp(rnp);
rcu_for_each_leaf_node(rsp, rnp)
rcu_print_detail_task_stall_rnp(rnp);
}
#else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
static void rcu_print_detail_task_stall(struct rcu_state *rsp)
{
}
#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
/*
* Scan the current list of tasks blocked within RCU read-side critical
* sections, printing out the tid of each.
*/
static void rcu_print_task_stall(struct rcu_node *rnp)
{
struct list_head *lp;
int phase;
struct task_struct *t;
if (rcu_preempted_readers(rnp)) {
phase = rnp->gpnum & 0x1;
lp = &rnp->blocked_tasks[phase];
list_for_each_entry(t, lp, rcu_node_entry)
printk(" P%d", t->pid);
spin_unlock_irqrestore(&rnp->lock, flags);
}
}
@ -388,11 +441,11 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
lp_root = &rnp_root->blocked_tasks[i];
while (!list_empty(lp)) {
tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
spin_lock(&rnp_root->lock); /* irqs already disabled */
raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
list_del(&tp->rcu_node_entry);
tp->rcu_blocked_node = rnp_root;
list_add(&tp->rcu_node_entry, lp_root);
spin_unlock(&rnp_root->lock); /* irqs remain disabled */
raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */
}
}
return retval;
@ -516,7 +569,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
unsigned long flags;
unsigned long mask;
spin_lock_irqsave(&rnp->lock, flags);
raw_spin_lock_irqsave(&rnp->lock, flags);
for (;;) {
if (!sync_rcu_preempt_exp_done(rnp))
break;
@ -525,12 +578,12 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
break;
}
mask = rnp->grpmask;
spin_unlock(&rnp->lock); /* irqs remain disabled */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
rnp = rnp->parent;
spin_lock(&rnp->lock); /* irqs already disabled */
raw_spin_lock(&rnp->lock); /* irqs already disabled */
rnp->expmask &= ~mask;
}
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
/*
@ -545,11 +598,11 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
{
int must_wait;
spin_lock(&rnp->lock); /* irqs already disabled */
raw_spin_lock(&rnp->lock); /* irqs already disabled */
list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]);
list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]);
must_wait = rcu_preempted_readers_exp(rnp);
spin_unlock(&rnp->lock); /* irqs remain disabled */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
if (!must_wait)
rcu_report_exp_rnp(rsp, rnp);
}
@ -594,13 +647,13 @@ void synchronize_rcu_expedited(void)
/* force all RCU readers onto blocked_tasks[]. */
synchronize_sched_expedited();
spin_lock_irqsave(&rsp->onofflock, flags);
raw_spin_lock_irqsave(&rsp->onofflock, flags);
/* Initialize ->expmask for all non-leaf rcu_node structures. */
rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
spin_lock(&rnp->lock); /* irqs already disabled. */
raw_spin_lock(&rnp->lock); /* irqs already disabled. */
rnp->expmask = rnp->qsmaskinit;
spin_unlock(&rnp->lock); /* irqs remain disabled. */
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
/* Snapshot current state of ->blocked_tasks[] lists. */
@ -609,7 +662,7 @@ void synchronize_rcu_expedited(void)
if (NUM_RCU_NODES > 1)
sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
spin_unlock_irqrestore(&rsp->onofflock, flags);
raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
/* Wait for snapshotted ->blocked_tasks[] lists to drain. */
rnp = rcu_get_root(rsp);
@ -712,6 +765,16 @@ long rcu_batches_completed(void)
}
EXPORT_SYMBOL_GPL(rcu_batches_completed);
/*
* Force a quiescent state for RCU, which, because there is no preemptible
* RCU, becomes the same as rcu-sched.
*/
void rcu_force_quiescent_state(void)
{
rcu_sched_force_quiescent_state();
}
EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
/*
* Because preemptable RCU does not exist, we never have to check for
* CPUs being in quiescent states.
@ -734,13 +797,21 @@ static int rcu_preempted_readers(struct rcu_node *rnp)
/* Because preemptible RCU does not exist, no quieting of tasks. */
static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
{
spin_unlock_irqrestore(&rnp->lock, flags);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
/*
* Because preemptable RCU does not exist, we never have to check for
* tasks blocked within RCU read-side critical sections.
*/
static void rcu_print_detail_task_stall(struct rcu_state *rsp)
{
}
/*
* Because preemptable RCU does not exist, we never have to check for
* tasks blocked within RCU read-side critical sections.
@ -884,3 +955,113 @@ static void __init __rcu_init_preempt(void)
}
#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
#if !defined(CONFIG_RCU_FAST_NO_HZ)
/*
* Check to see if any future RCU-related work will need to be done
* by the current CPU, even if none need be done immediately, returning
* 1 if so. This function is part of the RCU implementation; it is -not-
* an exported member of the RCU API.
*
* Because we have preemptible RCU, just check whether this CPU needs
* any flavor of RCU. Do not chew up lots of CPU cycles with preemption
* disabled in a most-likely vain attempt to cause RCU not to need this CPU.
*/
int rcu_needs_cpu(int cpu)
{
return rcu_needs_cpu_quick_check(cpu);
}
/*
* Check to see if we need to continue a callback-flush operations to
* allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle
* entry is not configured, so we never do need to.
*/
static void rcu_needs_cpu_flush(void)
{
}
#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
#define RCU_NEEDS_CPU_FLUSHES 5
static DEFINE_PER_CPU(int, rcu_dyntick_drain);
static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
/*
* Check to see if any future RCU-related work will need to be done
* by the current CPU, even if none need be done immediately, returning
* 1 if so. This function is part of the RCU implementation; it is -not-
* an exported member of the RCU API.
*
* Because we are not supporting preemptible RCU, attempt to accelerate
* any current grace periods so that RCU no longer needs this CPU, but
* only if all other CPUs are already in dynticks-idle mode. This will
* allow the CPU cores to be powered down immediately, as opposed to after
* waiting many milliseconds for grace periods to elapse.
*
* Because it is not legal to invoke rcu_process_callbacks() with irqs
* disabled, we do one pass of force_quiescent_state(), then do a
* raise_softirq() to cause rcu_process_callbacks() to be invoked later.
* The per-cpu rcu_dyntick_drain variable controls the sequencing.
*/
int rcu_needs_cpu(int cpu)
{
int c = 0;
int thatcpu;
/* Don't bother unless we are the last non-dyntick-idle CPU. */
for_each_cpu_not(thatcpu, nohz_cpu_mask)
if (thatcpu != cpu) {
per_cpu(rcu_dyntick_drain, cpu) = 0;
per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
return rcu_needs_cpu_quick_check(cpu);
}
/* Check and update the rcu_dyntick_drain sequencing. */
if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
/* First time through, initialize the counter. */
per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
} else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
/* We have hit the limit, so time to give up. */
per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
return rcu_needs_cpu_quick_check(cpu);
}
/* Do one step pushing remaining RCU callbacks through. */
if (per_cpu(rcu_sched_data, cpu).nxtlist) {
rcu_sched_qs(cpu);
force_quiescent_state(&rcu_sched_state, 0);
c = c || per_cpu(rcu_sched_data, cpu).nxtlist;
}
if (per_cpu(rcu_bh_data, cpu).nxtlist) {
rcu_bh_qs(cpu);
force_quiescent_state(&rcu_bh_state, 0);
c = c || per_cpu(rcu_bh_data, cpu).nxtlist;
}
/* If RCU callbacks are still pending, RCU still needs this CPU. */
if (c) {
raise_softirq(RCU_SOFTIRQ);
per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
}
return c;
}
/*
* Check to see if we need to continue a callback-flush operations to
* allow the last CPU to enter dyntick-idle mode.
*/
static void rcu_needs_cpu_flush(void)
{
int cpu = smp_processor_id();
unsigned long flags;
if (per_cpu(rcu_dyntick_drain, cpu) <= 0)
return;
local_irq_save(flags);
(void)rcu_needs_cpu(cpu);
local_irq_restore(flags);
}
#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */

View file

@ -50,7 +50,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
{
if (!rdp->beenonline)
return;
seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d",
seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pqc=%lu qp=%d",
rdp->cpu,
cpu_is_offline(rdp->cpu) ? '!' : ' ',
rdp->completed, rdp->gpnum,
@ -105,7 +105,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
{
if (!rdp->beenonline)
return;
seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d",
seq_printf(m, "%d,%s,%lu,%lu,%d,%lu,%d",
rdp->cpu,
cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"",
rdp->completed, rdp->gpnum,
@ -155,13 +155,13 @@ static const struct file_operations rcudata_csv_fops = {
static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
{
long gpnum;
unsigned long gpnum;
int level = 0;
int phase;
struct rcu_node *rnp;
gpnum = rsp->gpnum;
seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x "
seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x "
"nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n",
rsp->completed, gpnum, rsp->signaled,
(long)(rsp->jiffies_force_qs - jiffies),
@ -215,12 +215,12 @@ static const struct file_operations rcuhier_fops = {
static int show_rcugp(struct seq_file *m, void *unused)
{
#ifdef CONFIG_TREE_PREEMPT_RCU
seq_printf(m, "rcu_preempt: completed=%ld gpnum=%ld\n",
seq_printf(m, "rcu_preempt: completed=%ld gpnum=%lu\n",
rcu_preempt_state.completed, rcu_preempt_state.gpnum);
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
seq_printf(m, "rcu_sched: completed=%ld gpnum=%ld\n",
seq_printf(m, "rcu_sched: completed=%ld gpnum=%lu\n",
rcu_sched_state.completed, rcu_sched_state.gpnum);
seq_printf(m, "rcu_bh: completed=%ld gpnum=%ld\n",
seq_printf(m, "rcu_bh: completed=%ld gpnum=%lu\n",
rcu_bh_state.completed, rcu_bh_state.gpnum);
return 0;
}

View file

@ -188,6 +188,36 @@ static int __release_resource(struct resource *old)
return -EINVAL;
}
static void __release_child_resources(struct resource *r)
{
struct resource *tmp, *p;
resource_size_t size;
p = r->child;
r->child = NULL;
while (p) {
tmp = p;
p = p->sibling;
tmp->parent = NULL;
tmp->sibling = NULL;
__release_child_resources(tmp);
printk(KERN_DEBUG "release child resource %pR\n", tmp);
/* need to restore size, and keep flags */
size = resource_size(tmp);
tmp->start = 0;
tmp->end = size - 1;
}
}
void release_child_resources(struct resource *r)
{
write_lock(&resource_lock);
__release_child_resources(r);
write_unlock(&resource_lock);
}
/**
* request_resource - request and reserve an I/O or memory resource
* @root: root resource descriptor
@ -297,14 +327,29 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
#endif
static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg)
{
return 1;
}
/*
* This generic page_is_ram() returns true if specified address is
* registered as "System RAM" in iomem_resource list.
*/
int __weak page_is_ram(unsigned long pfn)
{
return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1;
}
/*
* Find empty slot in the resource tree given range and alignment.
*/
static int find_resource(struct resource *root, struct resource *new,
resource_size_t size, resource_size_t min,
resource_size_t max, resource_size_t align,
void (*alignf)(void *, struct resource *,
resource_size_t, resource_size_t),
resource_size_t (*alignf)(void *,
const struct resource *,
resource_size_t,
resource_size_t),
void *alignf_data)
{
struct resource *this = root->child;
@ -330,7 +375,7 @@ static int find_resource(struct resource *root, struct resource *new,
tmp.end = max;
tmp.start = ALIGN(tmp.start, align);
if (alignf)
alignf(alignf_data, &tmp, size, align);
tmp.start = alignf(alignf_data, &tmp, size, align);
if (tmp.start < tmp.end && tmp.end - tmp.start >= size - 1) {
new->start = tmp.start;
new->end = tmp.start + size - 1;
@ -358,8 +403,10 @@ static int find_resource(struct resource *root, struct resource *new,
int allocate_resource(struct resource *root, struct resource *new,
resource_size_t size, resource_size_t min,
resource_size_t max, resource_size_t align,
void (*alignf)(void *, struct resource *,
resource_size_t, resource_size_t),
resource_size_t (*alignf)(void *,
const struct resource *,
resource_size_t,
resource_size_t),
void *alignf_data)
{
int err;

File diff suppressed because it is too large Load diff

View file

@ -47,9 +47,7 @@ static int convert_prio(int prio)
}
#define for_each_cpupri_active(array, idx) \
for (idx = find_first_bit(array, CPUPRI_NR_PRIORITIES); \
idx < CPUPRI_NR_PRIORITIES; \
idx = find_next_bit(array, CPUPRI_NR_PRIORITIES, idx+1))
for_each_bit(idx, array, CPUPRI_NR_PRIORITIES)
/**
* cpupri_find - find the best (lowest-pri) CPU in the system

File diff suppressed because it is too large Load diff

View file

@ -44,24 +44,6 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
{
}
#ifdef CONFIG_SMP
static unsigned long
load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio)
{
return 0;
}
static int
move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle)
{
return 0;
}
#endif
static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
{
}
@ -97,7 +79,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
check_preempt_curr(rq, p, 0);
}
unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
{
return 0;
}
@ -119,9 +101,6 @@ static const struct sched_class idle_sched_class = {
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_idle,
.load_balance = load_balance_idle,
.move_one_task = move_one_task_idle,
#endif
.set_curr_task = set_curr_task_idle,

View file

@ -194,17 +194,20 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
return rt_se->my_q;
}
static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{
int this_cpu = smp_processor_id();
struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
struct sched_rt_entity *rt_se = rt_rq->rt_se;
struct sched_rt_entity *rt_se;
rt_se = rt_rq->tg->rt_se[this_cpu];
if (rt_rq->rt_nr_running) {
if (rt_se && !on_rt_rq(rt_se))
enqueue_rt_entity(rt_se);
enqueue_rt_entity(rt_se, false);
if (rt_rq->highest_prio.curr < curr->prio)
resched_task(curr);
}
@ -212,7 +215,10 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
{
struct sched_rt_entity *rt_se = rt_rq->rt_se;
int this_cpu = smp_processor_id();
struct sched_rt_entity *rt_se;
rt_se = rt_rq->tg->rt_se[this_cpu];
if (rt_se && on_rt_rq(rt_se))
dequeue_rt_entity(rt_se);
@ -803,7 +809,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
dec_rt_group(rt_se, rt_rq);
}
static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
{
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
struct rt_prio_array *array = &rt_rq->active;
@ -819,7 +825,10 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
return;
list_add_tail(&rt_se->run_list, queue);
if (head)
list_add(&rt_se->run_list, queue);
else
list_add_tail(&rt_se->run_list, queue);
__set_bit(rt_se_prio(rt_se), array->bitmap);
inc_rt_tasks(rt_se, rt_rq);
@ -856,11 +865,11 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
}
}
static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
{
dequeue_rt_stack(rt_se);
for_each_sched_rt_entity(rt_se)
__enqueue_rt_entity(rt_se);
__enqueue_rt_entity(rt_se, head);
}
static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
@ -871,21 +880,22 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
struct rt_rq *rt_rq = group_rt_rq(rt_se);
if (rt_rq && rt_rq->rt_nr_running)
__enqueue_rt_entity(rt_se);
__enqueue_rt_entity(rt_se, false);
}
}
/*
* Adding/removing a task to/from a priority array:
*/
static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
static void
enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, bool head)
{
struct sched_rt_entity *rt_se = &p->rt;
if (wakeup)
rt_se->timeout = 0;
enqueue_rt_entity(rt_se);
enqueue_rt_entity(rt_se, head);
if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);
@ -1481,24 +1491,6 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
push_rt_tasks(rq);
}
static unsigned long
load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio)
{
/* don't touch RT tasks */
return 0;
}
static int
move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle)
{
/* don't touch RT tasks */
return 0;
}
static void set_cpus_allowed_rt(struct task_struct *p,
const struct cpumask *new_mask)
{
@ -1721,7 +1713,7 @@ static void set_curr_task_rt(struct rq *rq)
dequeue_pushable_task(rq, p);
}
unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
{
/*
* Time slice is 0 for SCHED_FIFO tasks
@ -1746,8 +1738,6 @@ static const struct sched_class rt_sched_class = {
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_rt,
.load_balance = load_balance_rt,
.move_one_task = move_one_task_rt,
.set_cpus_allowed = set_cpus_allowed_rt,
.rq_online = rq_online_rt,
.rq_offline = rq_offline_rt,

View file

@ -12,8 +12,6 @@
#include <linux/smp.h>
#include <linux/cpu.h>
static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
static struct {
struct list_head queue;
raw_spinlock_t lock;
@ -33,12 +31,14 @@ struct call_function_data {
cpumask_var_t cpumask;
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
struct call_single_queue {
struct list_head list;
raw_spinlock_t lock;
};
static DEFINE_PER_CPU(struct call_function_data, cfd_data);
static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_queue, call_single_queue);
static int
hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
@ -256,7 +256,7 @@ void generic_smp_call_function_single_interrupt(void)
}
}
static DEFINE_PER_CPU(struct call_single_data, csd_data);
static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
/*
* smp_call_function_single - Run a function on a specific CPU

View file

@ -34,6 +34,30 @@
#include <linux/smp.h>
#include <linux/srcu.h>
static int init_srcu_struct_fields(struct srcu_struct *sp)
{
sp->completed = 0;
mutex_init(&sp->mutex);
sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
return sp->per_cpu_ref ? 0 : -ENOMEM;
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
int __init_srcu_struct(struct srcu_struct *sp, const char *name,
struct lock_class_key *key)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/* Don't re-initialize a lock while it is held. */
debug_check_no_locks_freed((void *)sp, sizeof(*sp));
lockdep_init_map(&sp->dep_map, name, key, 0);
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
return init_srcu_struct_fields(sp);
}
EXPORT_SYMBOL_GPL(__init_srcu_struct);
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
/**
* init_srcu_struct - initialize a sleep-RCU structure
* @sp: structure to initialize.
@ -44,13 +68,12 @@
*/
int init_srcu_struct(struct srcu_struct *sp)
{
sp->completed = 0;
mutex_init(&sp->mutex);
sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
return (sp->per_cpu_ref ? 0 : -ENOMEM);
return init_srcu_struct_fields(sp);
}
EXPORT_SYMBOL_GPL(init_srcu_struct);
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
/*
* srcu_readers_active_idx -- returns approximate number of readers
* active on the specified rank of per-CPU counters.
@ -100,15 +123,12 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
}
EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
/**
* srcu_read_lock - register a new reader for an SRCU-protected structure.
* @sp: srcu_struct in which to register the new reader.
*
/*
* Counts the new reader in the appropriate per-CPU element of the
* srcu_struct. Must be called from process context.
* Returns an index that must be passed to the matching srcu_read_unlock().
*/
int srcu_read_lock(struct srcu_struct *sp)
int __srcu_read_lock(struct srcu_struct *sp)
{
int idx;
@ -120,31 +140,27 @@ int srcu_read_lock(struct srcu_struct *sp)
preempt_enable();
return idx;
}
EXPORT_SYMBOL_GPL(srcu_read_lock);
EXPORT_SYMBOL_GPL(__srcu_read_lock);
/**
* srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
* @sp: srcu_struct in which to unregister the old reader.
* @idx: return value from corresponding srcu_read_lock().
*
/*
* Removes the count for the old reader from the appropriate per-CPU
* element of the srcu_struct. Note that this may well be a different
* CPU than that which was incremented by the corresponding srcu_read_lock().
* Must be called from process context.
*/
void srcu_read_unlock(struct srcu_struct *sp, int idx)
void __srcu_read_unlock(struct srcu_struct *sp, int idx)
{
preempt_disable();
srcu_barrier(); /* ensure compiler won't misorder critical section. */
per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--;
preempt_enable();
}
EXPORT_SYMBOL_GPL(srcu_read_unlock);
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
/*
* Helper function for synchronize_srcu() and synchronize_srcu_expedited().
*/
void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
{
int idx;

View file

@ -571,11 +571,6 @@ static int set_user(struct cred *new)
if (!new_user)
return -EAGAIN;
if (!task_can_switch_user(new_user, current)) {
free_uid(new_user);
return -EINVAL;
}
if (atomic_read(&new_user->processes) >=
current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
new_user != INIT_USER) {

View file

@ -1441,7 +1441,7 @@ static struct ctl_table fs_table[] = {
};
static struct ctl_table debug_table[] = {
#if defined(CONFIG_X86) || defined(CONFIG_PPC)
#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC)
{
.procname = "exception-trace",
.data = &show_unhandled_signals,

View file

@ -46,15 +46,13 @@ static struct genl_family family = {
.maxattr = TASKSTATS_CMD_ATTR_MAX,
};
static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1]
__read_mostly = {
static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = {
[TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 },
[TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
static struct nla_policy
cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] __read_mostly = {
static const struct nla_policy cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] = {
[CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 },
};

View file

@ -452,6 +452,18 @@ static inline int clocksource_watchdog_kthread(void *data) { return 0; }
#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
/**
* clocksource_suspend - suspend the clocksource(s)
*/
void clocksource_suspend(void)
{
struct clocksource *cs;
list_for_each_entry_reverse(cs, &clocksource_list, list)
if (cs->suspend)
cs->suspend(cs);
}
/**
* clocksource_resume - resume the clocksource(s)
*/
@ -461,7 +473,7 @@ void clocksource_resume(void)
list_for_each_entry(cs, &clocksource_list, list)
if (cs->resume)
cs->resume();
cs->resume(cs);
clocksource_resume_watchdog();
}

View file

@ -58,10 +58,10 @@ static s64 time_offset;
static long time_constant = 2;
/* maximum error (usecs): */
long time_maxerror = NTP_PHASE_LIMIT;
static long time_maxerror = NTP_PHASE_LIMIT;
/* estimated error (usecs): */
long time_esterror = NTP_PHASE_LIMIT;
static long time_esterror = NTP_PHASE_LIMIT;
/* frequency offset (scaled nsecs/secs): */
static s64 time_freq;
@ -142,11 +142,11 @@ static void ntp_update_offset(long offset)
* Select how the frequency is to be controlled
* and in which mode (PLL or FLL).
*/
secs = xtime.tv_sec - time_reftime;
secs = get_seconds() - time_reftime;
if (unlikely(time_status & STA_FREQHOLD))
secs = 0;
time_reftime = xtime.tv_sec;
time_reftime = get_seconds();
offset64 = offset;
freq_adj = (offset64 * secs) <<
@ -368,7 +368,7 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
* reference time to current time.
*/
if (!(time_status & STA_PLL) && (txc->status & STA_PLL))
time_reftime = xtime.tv_sec;
time_reftime = get_seconds();
/* only set allowed bits */
time_status &= STA_RONLY;

View file

@ -622,6 +622,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
write_sequnlock_irqrestore(&xtime_lock, flags);
clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
clocksource_suspend();
return 0;
}

View file

@ -51,7 +51,9 @@ endif
obj-$(CONFIG_EVENT_TRACING) += trace_events.o
obj-$(CONFIG_EVENT_TRACING) += trace_export.o
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o

View file

@ -540,9 +540,10 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (ret)
return ret;
if (copy_to_user(arg, &buts, sizeof(buts)))
if (copy_to_user(arg, &buts, sizeof(buts))) {
blk_trace_remove(q);
return -EFAULT;
}
return 0;
}
EXPORT_SYMBOL_GPL(blk_trace_setup);

View file

@ -22,7 +22,6 @@
#include <linux/hardirq.h>
#include <linux/kthread.h>
#include <linux/uaccess.h>
#include <linux/kprobes.h>
#include <linux/ftrace.h>
#include <linux/sysctl.h>
#include <linux/ctype.h>
@ -898,36 +897,6 @@ static struct dyn_ftrace *ftrace_free_records;
} \
}
#ifdef CONFIG_KPROBES
static int frozen_record_count;
static inline void freeze_record(struct dyn_ftrace *rec)
{
if (!(rec->flags & FTRACE_FL_FROZEN)) {
rec->flags |= FTRACE_FL_FROZEN;
frozen_record_count++;
}
}
static inline void unfreeze_record(struct dyn_ftrace *rec)
{
if (rec->flags & FTRACE_FL_FROZEN) {
rec->flags &= ~FTRACE_FL_FROZEN;
frozen_record_count--;
}
}
static inline int record_frozen(struct dyn_ftrace *rec)
{
return rec->flags & FTRACE_FL_FROZEN;
}
#else
# define freeze_record(rec) ({ 0; })
# define unfreeze_record(rec) ({ 0; })
# define record_frozen(rec) ({ 0; })
#endif /* CONFIG_KPROBES */
static void ftrace_free_rec(struct dyn_ftrace *rec)
{
rec->freelist = ftrace_free_records;
@ -1025,6 +994,21 @@ static void ftrace_bug(int failed, unsigned long ip)
}
/* Return 1 if the address range is reserved for ftrace */
int ftrace_text_reserved(void *start, void *end)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
do_for_each_ftrace_rec(pg, rec) {
if (rec->ip <= (unsigned long)end &&
rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
return 1;
} while_for_each_ftrace_rec();
return 0;
}
static int
__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
{
@ -1076,14 +1060,6 @@ static void ftrace_replace_code(int enable)
!(rec->flags & FTRACE_FL_CONVERTED))
continue;
/* ignore updates to this record's mcount site */
if (get_kprobe((void *)rec->ip)) {
freeze_record(rec);
continue;
} else {
unfreeze_record(rec);
}
failed = __ftrace_replace_code(rec, enable);
if (failed) {
rec->flags |= FTRACE_FL_FAILED;

View file

@ -1703,6 +1703,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
ftrace_enable_cpu();
iter->leftover = 0;
for (p = iter; p && l < *pos; p = s_next(m, p, &l))
;

View file

@ -6,14 +6,12 @@
*/
#include <linux/module.h>
#include <linux/kprobes.h>
#include "trace.h"
char *perf_trace_buf;
EXPORT_SYMBOL_GPL(perf_trace_buf);
char *perf_trace_buf_nmi;
EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
static char *perf_trace_buf;
static char *perf_trace_buf_nmi;
typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id)
}
mutex_unlock(&event_mutex);
}
__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
int *rctxp, unsigned long *irq_flags)
{
struct trace_entry *entry;
char *trace_buf, *raw_data;
int pc, cpu;
pc = preempt_count();
/* Protect the per cpu buffer, begin the rcu read side */
local_irq_save(*irq_flags);
*rctxp = perf_swevent_get_recursion_context();
if (*rctxp < 0)
goto err_recursion;
cpu = smp_processor_id();
if (in_nmi())
trace_buf = rcu_dereference(perf_trace_buf_nmi);
else
trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto err;
raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
entry = (struct trace_entry *)raw_data;
tracing_generic_entry_update(entry, *irq_flags, pc);
entry->type = type;
return raw_data;
err:
perf_swevent_put_recursion_context(*rctxp);
err_recursion:
local_irq_restore(*irq_flags);
return NULL;
}
EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare);

View file

@ -1371,7 +1371,7 @@ out_unlock:
return err;
}
#ifdef CONFIG_EVENT_PROFILE
#ifdef CONFIG_PERF_EVENTS
void ftrace_profile_free_filter(struct perf_event *event)
{
@ -1439,5 +1439,5 @@ out_unlock:
return err;
}
#endif /* CONFIG_EVENT_PROFILE */
#endif /* CONFIG_PERF_EVENTS */

View file

@ -91,11 +91,6 @@ static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
return retval;
}
static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
{
return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
}
static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
void *dummy)
{
@ -231,9 +226,7 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
{
int ret = -EINVAL;
if (ff->func == fetch_argument)
ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
else if (ff->func == fetch_register) {
if (ff->func == fetch_register) {
const char *name;
name = regs_query_register_name((unsigned int)((long)ff->data));
ret = snprintf(buf, n, "%%%s", name);
@ -489,14 +482,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
}
} else
ret = -EINVAL;
} else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
ret = strict_strtoul(arg + 3, 10, &param);
if (ret || param > PARAM_MAX_ARGS)
ret = -EINVAL;
else {
ff->func = fetch_argument;
ff->data = (void *)param;
}
} else
ret = -EINVAL;
return ret;
@ -611,7 +596,6 @@ static int create_trace_probe(int argc, char **argv)
* - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
* - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
* Fetch args:
* $argN : fetch Nth of function argument. (N:0-)
* $retval : fetch return value
* $stack : fetch stack address
* $stackN : fetch Nth of stack (N:0-)
@ -958,7 +942,7 @@ static const struct file_operations kprobe_profile_ops = {
};
/* Kprobe handler */
static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
{
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct kprobe_trace_entry *entry;
@ -978,7 +962,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
irq_flags, pc);
if (!event)
return 0;
return;
entry = ring_buffer_event_data(event);
entry->nargs = tp->nr_args;
@ -988,11 +972,10 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
if (!filter_current_check_discard(buffer, call, entry, event))
trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
return 0;
}
/* Kretprobe handler */
static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@ -1011,7 +994,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
irq_flags, pc);
if (!event)
return 0;
return;
entry = ring_buffer_event_data(event);
entry->nargs = tp->nr_args;
@ -1022,8 +1005,6 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
if (!filter_current_check_discard(buffer, call, entry, event))
trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
return 0;
}
/* Event entry printers */
@ -1230,137 +1211,67 @@ static int set_print_fmt(struct trace_probe *tp)
return 0;
}
#ifdef CONFIG_EVENT_PROFILE
#ifdef CONFIG_PERF_EVENTS
/* Kprobe profile handler */
static __kprobes int kprobe_profile_func(struct kprobe *kp,
static __kprobes void kprobe_profile_func(struct kprobe *kp,
struct pt_regs *regs)
{
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct ftrace_event_call *call = &tp->call;
struct kprobe_trace_entry *entry;
struct trace_entry *ent;
int size, __size, i, pc, __cpu;
int size, __size, i;
unsigned long irq_flags;
char *trace_buf;
char *raw_data;
int rctx;
pc = preempt_count();
__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
"profile buffer not large enough"))
return 0;
return;
/*
* Protect the non nmi buffer
* This also protects the rcu read side
*/
local_irq_save(irq_flags);
entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
if (!entry)
return;
rctx = perf_swevent_get_recursion_context();
if (rctx < 0)
goto end_recursion;
__cpu = smp_processor_id();
if (in_nmi())
trace_buf = rcu_dereference(perf_trace_buf_nmi);
else
trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto end;
raw_data = per_cpu_ptr(trace_buf, __cpu);
/* Zero dead bytes from alignment to avoid buffer leak to userspace */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
entry = (struct kprobe_trace_entry *)raw_data;
ent = &entry->ent;
tracing_generic_entry_update(ent, irq_flags, pc);
ent->type = call->id;
entry->nargs = tp->nr_args;
entry->ip = (unsigned long)kp->addr;
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
perf_tp_event(call->id, entry->ip, 1, entry, size);
end:
perf_swevent_put_recursion_context(rctx);
end_recursion:
local_irq_restore(irq_flags);
return 0;
ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags);
}
/* Kretprobe profile handler */
static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
struct ftrace_event_call *call = &tp->call;
struct kretprobe_trace_entry *entry;
struct trace_entry *ent;
int size, __size, i, pc, __cpu;
int size, __size, i;
unsigned long irq_flags;
char *trace_buf;
char *raw_data;
int rctx;
pc = preempt_count();
__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
"profile buffer not large enough"))
return 0;
return;
/*
* Protect the non nmi buffer
* This also protects the rcu read side
*/
local_irq_save(irq_flags);
entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
if (!entry)
return;
rctx = perf_swevent_get_recursion_context();
if (rctx < 0)
goto end_recursion;
__cpu = smp_processor_id();
if (in_nmi())
trace_buf = rcu_dereference(perf_trace_buf_nmi);
else
trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto end;
raw_data = per_cpu_ptr(trace_buf, __cpu);
/* Zero dead bytes from alignment to avoid buffer leak to userspace */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
entry = (struct kretprobe_trace_entry *)raw_data;
ent = &entry->ent;
tracing_generic_entry_update(ent, irq_flags, pc);
ent->type = call->id;
entry->nargs = tp->nr_args;
entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
end:
perf_swevent_put_recursion_context(rctx);
end_recursion:
local_irq_restore(irq_flags);
return 0;
ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags);
}
static int probe_profile_enable(struct ftrace_event_call *call)
@ -1388,7 +1299,7 @@ static void probe_profile_disable(struct ftrace_event_call *call)
disable_kprobe(&tp->rp.kp);
}
}
#endif /* CONFIG_EVENT_PROFILE */
#endif /* CONFIG_PERF_EVENTS */
static __kprobes
@ -1398,10 +1309,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
if (tp->flags & TP_FLAG_TRACE)
kprobe_trace_func(kp, regs);
#ifdef CONFIG_EVENT_PROFILE
#ifdef CONFIG_PERF_EVENTS
if (tp->flags & TP_FLAG_PROFILE)
kprobe_profile_func(kp, regs);
#endif /* CONFIG_EVENT_PROFILE */
#endif
return 0; /* We don't tweek kernel, so just return 0 */
}
@ -1412,10 +1323,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
if (tp->flags & TP_FLAG_TRACE)
kretprobe_trace_func(ri, regs);
#ifdef CONFIG_EVENT_PROFILE
#ifdef CONFIG_PERF_EVENTS
if (tp->flags & TP_FLAG_PROFILE)
kretprobe_profile_func(ri, regs);
#endif /* CONFIG_EVENT_PROFILE */
#endif
return 0; /* We don't tweek kernel, so just return 0 */
}
@ -1446,7 +1357,7 @@ static int register_probe_event(struct trace_probe *tp)
call->regfunc = probe_event_enable;
call->unregfunc = probe_event_disable;
#ifdef CONFIG_EVENT_PROFILE
#ifdef CONFIG_PERF_EVENTS
call->profile_enable = probe_profile_enable;
call->profile_disable = probe_profile_disable;
#endif
@ -1507,28 +1418,67 @@ static int kprobe_trace_selftest_target(int a1, int a2, int a3,
static __init int kprobe_trace_self_tests_init(void)
{
int ret;
int ret, warn = 0;
int (*target)(int, int, int, int, int, int);
struct trace_probe *tp;
target = kprobe_trace_selftest_target;
pr_info("Testing kprobe tracing: ");
ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
"$arg1 $arg2 $arg3 $arg4 $stack $stack0");
if (WARN_ON_ONCE(ret))
pr_warning("error enabling function entry\n");
"$stack $stack0 +0($stack)");
if (WARN_ON_ONCE(ret)) {
pr_warning("error on probing function entry.\n");
warn++;
} else {
/* Enable trace point */
tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
if (WARN_ON_ONCE(tp == NULL)) {
pr_warning("error on getting new probe.\n");
warn++;
} else
probe_event_enable(&tp->call);
}
ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
"$retval");
if (WARN_ON_ONCE(ret))
pr_warning("error enabling function return\n");
if (WARN_ON_ONCE(ret)) {
pr_warning("error on probing function return.\n");
warn++;
} else {
/* Enable trace point */
tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
if (WARN_ON_ONCE(tp == NULL)) {
pr_warning("error on getting new probe.\n");
warn++;
} else
probe_event_enable(&tp->call);
}
if (warn)
goto end;
ret = target(1, 2, 3, 4, 5, 6);
cleanup_all_probes();
ret = command_trace_probe("-:testprobe");
if (WARN_ON_ONCE(ret)) {
pr_warning("error on deleting a probe.\n");
warn++;
}
pr_cont("OK\n");
ret = command_trace_probe("-:testprobe2");
if (WARN_ON_ONCE(ret)) {
pr_warning("error on deleting a probe.\n");
warn++;
}
end:
cleanup_all_probes();
if (warn)
pr_cont("NG: Some tests are failed. Please check them.\n");
else
pr_cont("OK\n");
return 0;
}

View file

@ -426,7 +426,7 @@ int __init init_ftrace_syscalls(void)
}
core_initcall(init_ftrace_syscalls);
#ifdef CONFIG_EVENT_PROFILE
#ifdef CONFIG_PERF_EVENTS
static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
@ -438,12 +438,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
unsigned long flags;
char *trace_buf;
char *raw_data;
int syscall_nr;
int rctx;
int size;
int cpu;
syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@ -462,37 +459,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
"profile buffer not large enough"))
return;
/* Protect the per cpu buffer, begin the rcu read side */
local_irq_save(flags);
rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size,
sys_data->enter_event->id, &rctx, &flags);
if (!rec)
return;
rctx = perf_swevent_get_recursion_context();
if (rctx < 0)
goto end_recursion;
cpu = smp_processor_id();
trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto end;
raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
rec = (struct syscall_trace_enter *) raw_data;
tracing_generic_entry_update(&rec->ent, 0, 0);
rec->ent.type = sys_data->enter_event->id;
rec->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);
perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
end:
perf_swevent_put_recursion_context(rctx);
end_recursion:
local_irq_restore(flags);
ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
}
int prof_sysenter_enable(struct ftrace_event_call *call)
@ -536,11 +511,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
struct syscall_trace_exit *rec;
unsigned long flags;
int syscall_nr;
char *trace_buf;
char *raw_data;
int rctx;
int size;
int cpu;
syscall_nr = syscall_get_nr(current, regs);
if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@ -562,38 +534,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
"exit event has grown above profile buffer size"))
return;
/* Protect the per cpu buffer, begin the rcu read side */
local_irq_save(flags);
rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size,
sys_data->exit_event->id, &rctx, &flags);
if (!rec)
return;
rctx = perf_swevent_get_recursion_context();
if (rctx < 0)
goto end_recursion;
cpu = smp_processor_id();
trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto end;
raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
rec = (struct syscall_trace_exit *)raw_data;
tracing_generic_entry_update(&rec->ent, 0, 0);
rec->ent.type = sys_data->exit_event->id;
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
end:
perf_swevent_put_recursion_context(rctx);
end_recursion:
local_irq_restore(flags);
ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
}
int prof_sysexit_enable(struct ftrace_event_call *call)
@ -631,6 +580,5 @@ void prof_sysexit_disable(struct ftrace_event_call *call)
mutex_unlock(&syscall_trace_lock);
}
#endif
#endif /* CONFIG_PERF_EVENTS */

View file

@ -56,9 +56,6 @@ struct user_struct root_user = {
.sigpending = ATOMIC_INIT(0),
.locked_shm = 0,
.user_ns = &init_user_ns,
#ifdef CONFIG_USER_SCHED
.tg = &init_task_group,
#endif
};
/*
@ -75,268 +72,6 @@ static void uid_hash_remove(struct user_struct *up)
put_user_ns(up->user_ns);
}
#ifdef CONFIG_USER_SCHED
static void sched_destroy_user(struct user_struct *up)
{
sched_destroy_group(up->tg);
}
static int sched_create_user(struct user_struct *up)
{
int rc = 0;
up->tg = sched_create_group(&root_task_group);
if (IS_ERR(up->tg))
rc = -ENOMEM;
set_tg_uid(up);
return rc;
}
#else /* CONFIG_USER_SCHED */
static void sched_destroy_user(struct user_struct *up) { }
static int sched_create_user(struct user_struct *up) { return 0; }
#endif /* CONFIG_USER_SCHED */
#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS)
static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
{
struct user_struct *user;
struct hlist_node *h;
hlist_for_each_entry(user, h, hashent, uidhash_node) {
if (user->uid == uid) {
/* possibly resurrect an "almost deleted" object */
if (atomic_inc_return(&user->__count) == 1)
cancel_delayed_work(&user->work);
return user;
}
}
return NULL;
}
static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */
static DEFINE_MUTEX(uids_mutex);
static inline void uids_mutex_lock(void)
{
mutex_lock(&uids_mutex);
}
static inline void uids_mutex_unlock(void)
{
mutex_unlock(&uids_mutex);
}
/* uid directory attributes */
#ifdef CONFIG_FAIR_GROUP_SCHED
static ssize_t cpu_shares_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
return sprintf(buf, "%lu\n", sched_group_shares(up->tg));
}
static ssize_t cpu_shares_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t size)
{
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
unsigned long shares;
int rc;
sscanf(buf, "%lu", &shares);
rc = sched_group_set_shares(up->tg, shares);
return (rc ? rc : size);
}
static struct kobj_attribute cpu_share_attr =
__ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store);
#endif
#ifdef CONFIG_RT_GROUP_SCHED
static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
return sprintf(buf, "%ld\n", sched_group_rt_runtime(up->tg));
}
static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t size)
{
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
unsigned long rt_runtime;
int rc;
sscanf(buf, "%ld", &rt_runtime);
rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
return (rc ? rc : size);
}
static struct kobj_attribute cpu_rt_runtime_attr =
__ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
static ssize_t cpu_rt_period_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
return sprintf(buf, "%lu\n", sched_group_rt_period(up->tg));
}
static ssize_t cpu_rt_period_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t size)
{
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
unsigned long rt_period;
int rc;
sscanf(buf, "%lu", &rt_period);
rc = sched_group_set_rt_period(up->tg, rt_period);
return (rc ? rc : size);
}
static struct kobj_attribute cpu_rt_period_attr =
__ATTR(cpu_rt_period, 0644, cpu_rt_period_show, cpu_rt_period_store);
#endif
/* default attributes per uid directory */
static struct attribute *uids_attributes[] = {
#ifdef CONFIG_FAIR_GROUP_SCHED
&cpu_share_attr.attr,
#endif
#ifdef CONFIG_RT_GROUP_SCHED
&cpu_rt_runtime_attr.attr,
&cpu_rt_period_attr.attr,
#endif
NULL
};
/* the lifetime of user_struct is not managed by the core (now) */
static void uids_release(struct kobject *kobj)
{
return;
}
static struct kobj_type uids_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.default_attrs = uids_attributes,
.release = uids_release,
};
/*
* Create /sys/kernel/uids/<uid>/cpu_share file for this user
* We do not create this file for users in a user namespace (until
* sysfs tagging is implemented).
*
* See Documentation/scheduler/sched-design-CFS.txt for ramifications.
*/
static int uids_user_create(struct user_struct *up)
{
struct kobject *kobj = &up->kobj;
int error;
memset(kobj, 0, sizeof(struct kobject));
if (up->user_ns != &init_user_ns)
return 0;
kobj->kset = uids_kset;
error = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid);
if (error) {
kobject_put(kobj);
goto done;
}
kobject_uevent(kobj, KOBJ_ADD);
done:
return error;
}
/* create these entries in sysfs:
* "/sys/kernel/uids" directory
* "/sys/kernel/uids/0" directory (for root user)
* "/sys/kernel/uids/0/cpu_share" file (for root user)
*/
int __init uids_sysfs_init(void)
{
uids_kset = kset_create_and_add("uids", NULL, kernel_kobj);
if (!uids_kset)
return -ENOMEM;
return uids_user_create(&root_user);
}
/* delayed work function to remove sysfs directory for a user and free up
* corresponding structures.
*/
static void cleanup_user_struct(struct work_struct *w)
{
struct user_struct *up = container_of(w, struct user_struct, work.work);
unsigned long flags;
int remove_user = 0;
/* Make uid_hash_remove() + sysfs_remove_file() + kobject_del()
* atomic.
*/
uids_mutex_lock();
spin_lock_irqsave(&uidhash_lock, flags);
if (atomic_read(&up->__count) == 0) {
uid_hash_remove(up);
remove_user = 1;
}
spin_unlock_irqrestore(&uidhash_lock, flags);
if (!remove_user)
goto done;
if (up->user_ns == &init_user_ns) {
kobject_uevent(&up->kobj, KOBJ_REMOVE);
kobject_del(&up->kobj);
kobject_put(&up->kobj);
}
sched_destroy_user(up);
key_put(up->uid_keyring);
key_put(up->session_keyring);
kmem_cache_free(uid_cachep, up);
done:
uids_mutex_unlock();
}
/* IRQs are disabled and uidhash_lock is held upon function entry.
* IRQ state (as stored in flags) is restored and uidhash_lock released
* upon function exit.
*/
static void free_user(struct user_struct *up, unsigned long flags)
{
INIT_DELAYED_WORK(&up->work, cleanup_user_struct);
schedule_delayed_work(&up->work, msecs_to_jiffies(1000));
spin_unlock_irqrestore(&uidhash_lock, flags);
}
#else /* CONFIG_USER_SCHED && CONFIG_SYSFS */
static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
{
struct user_struct *user;
@ -352,11 +87,6 @@ static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
return NULL;
}
int uids_sysfs_init(void) { return 0; }
static inline int uids_user_create(struct user_struct *up) { return 0; }
static inline void uids_mutex_lock(void) { }
static inline void uids_mutex_unlock(void) { }
/* IRQs are disabled and uidhash_lock is held upon function entry.
* IRQ state (as stored in flags) is restored and uidhash_lock released
* upon function exit.
@ -365,32 +95,11 @@ static void free_user(struct user_struct *up, unsigned long flags)
{
uid_hash_remove(up);
spin_unlock_irqrestore(&uidhash_lock, flags);
sched_destroy_user(up);
key_put(up->uid_keyring);
key_put(up->session_keyring);
kmem_cache_free(uid_cachep, up);
}
#endif
#if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_USER_SCHED)
/*
* We need to check if a setuid can take place. This function should be called
* before successfully completing the setuid.
*/
int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
{
return sched_rt_can_attach(up->tg, tsk);
}
#else
int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
{
return 1;
}
#endif
/*
* Locate the user_struct for the passed UID. If found, take a ref on it. The
* caller must undo that ref with free_uid().
@ -431,8 +140,6 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
/* Make uid_hash_find() + uids_user_create() + uid_hash_insert()
* atomic.
*/
uids_mutex_lock();
spin_lock_irq(&uidhash_lock);
up = uid_hash_find(uid, hashent);
spin_unlock_irq(&uidhash_lock);
@ -445,14 +152,8 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
new->uid = uid;
atomic_set(&new->__count, 1);
if (sched_create_user(new) < 0)
goto out_free_user;
new->user_ns = get_user_ns(ns);
if (uids_user_create(new))
goto out_destoy_sched;
/*
* Before adding this, check whether we raced
* on adding the same user already..
@ -475,17 +176,11 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
spin_unlock_irq(&uidhash_lock);
}
uids_mutex_unlock();
return up;
out_destoy_sched:
sched_destroy_user(new);
put_user_ns(new->user_ns);
out_free_user:
kmem_cache_free(uid_cachep, new);
out_unlock:
uids_mutex_unlock();
return NULL;
}