Merge branch 'tracing/urgent' into tracing/core
Merge reason: pick up both v2.6.30-rc1 [which includes tracing/urgent fixes]
and pick up the current lineup of tracing/urgent fixes as well
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
commit
1cad1252ed
813 changed files with 74718 additions and 21547 deletions
|
|
@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
|
|||
obj-$(CONFIG_KPROBES) += kprobes.o
|
||||
obj-$(CONFIG_KGDB) += kgdb.o
|
||||
obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
|
||||
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
|
||||
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
|
||||
obj-$(CONFIG_SECCOMP) += seccomp.o
|
||||
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
|
||||
|
|
|
|||
|
|
@ -923,6 +923,8 @@ NORET_TYPE void do_exit(long code)
|
|||
schedule();
|
||||
}
|
||||
|
||||
exit_irq_thread();
|
||||
|
||||
exit_signals(tsk); /* sets PF_EXITING */
|
||||
/*
|
||||
* tsk->flags are checked in the futex code to protect against
|
||||
|
|
|
|||
|
|
@ -645,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
|
|||
|
||||
tsk->min_flt = tsk->maj_flt = 0;
|
||||
tsk->nvcsw = tsk->nivcsw = 0;
|
||||
#ifdef CONFIG_DETECT_HUNG_TASK
|
||||
tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
|
||||
#endif
|
||||
|
||||
tsk->mm = NULL;
|
||||
tsk->active_mm = NULL;
|
||||
|
|
@ -797,6 +800,12 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
|
|||
sig->cputime_expires.virt_exp = cputime_zero;
|
||||
sig->cputime_expires.sched_exp = 0;
|
||||
|
||||
if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
|
||||
sig->cputime_expires.prof_exp =
|
||||
secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
|
||||
sig->cputimer.running = 1;
|
||||
}
|
||||
|
||||
/* The timer lists. */
|
||||
INIT_LIST_HEAD(&sig->cpu_timers[0]);
|
||||
INIT_LIST_HEAD(&sig->cpu_timers[1]);
|
||||
|
|
@ -812,11 +821,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
|
|||
atomic_inc(¤t->signal->live);
|
||||
return 0;
|
||||
}
|
||||
|
||||
sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
|
||||
|
||||
if (sig)
|
||||
posix_cpu_timers_init_group(sig);
|
||||
|
||||
tsk->signal = sig;
|
||||
if (!sig)
|
||||
return -ENOMEM;
|
||||
|
|
@ -856,6 +862,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
|
|||
memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
|
||||
task_unlock(current->group_leader);
|
||||
|
||||
posix_cpu_timers_init_group(sig);
|
||||
|
||||
acct_init_pacct(&sig->pacct);
|
||||
|
||||
tty_audit_fork(sig);
|
||||
|
|
@ -1032,11 +1040,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
|||
|
||||
p->default_timer_slack_ns = current->timer_slack_ns;
|
||||
|
||||
#ifdef CONFIG_DETECT_SOFTLOCKUP
|
||||
p->last_switch_count = 0;
|
||||
p->last_switch_timestamp = 0;
|
||||
#endif
|
||||
|
||||
task_io_accounting_init(&p->ioac);
|
||||
acct_clear_integrals(p);
|
||||
|
||||
|
|
|
|||
|
|
@ -883,7 +883,12 @@ retry_private:
|
|||
out_unlock:
|
||||
double_unlock_hb(hb1, hb2);
|
||||
|
||||
/* drop_futex_key_refs() must be called outside the spinlocks. */
|
||||
/*
|
||||
* drop_futex_key_refs() must be called outside the spinlocks. During
|
||||
* the requeue we moved futex_q's from the hash bucket at key1 to the
|
||||
* one at key2 and updated their key pointer. We no longer need to
|
||||
* hold the references to key1.
|
||||
*/
|
||||
while (--drop_count >= 0)
|
||||
drop_futex_key_refs(&key1);
|
||||
|
||||
|
|
|
|||
217
kernel/hung_task.c
Normal file
217
kernel/hung_task.c
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
/*
|
||||
* Detect Hung Task
|
||||
*
|
||||
* kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sysctl.h>
|
||||
|
||||
/*
|
||||
* The number of tasks checked:
|
||||
*/
|
||||
unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
|
||||
|
||||
/*
|
||||
* Limit number of tasks checked in a batch.
|
||||
*
|
||||
* This value controls the preemptibility of khungtaskd since preemption
|
||||
* is disabled during the critical section. It also controls the size of
|
||||
* the RCU grace period. So it needs to be upper-bound.
|
||||
*/
|
||||
#define HUNG_TASK_BATCHING 1024
|
||||
|
||||
/*
|
||||
* Zero means infinite timeout - no checking done:
|
||||
*/
|
||||
unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
|
||||
|
||||
unsigned long __read_mostly sysctl_hung_task_warnings = 10;
|
||||
|
||||
static int __read_mostly did_panic;
|
||||
|
||||
static struct task_struct *watchdog_task;
|
||||
|
||||
/*
|
||||
* Should we panic (and reboot, if panic_timeout= is set) when a
|
||||
* hung task is detected:
|
||||
*/
|
||||
unsigned int __read_mostly sysctl_hung_task_panic =
|
||||
CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
|
||||
|
||||
static int __init hung_task_panic_setup(char *str)
|
||||
{
|
||||
sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("hung_task_panic=", hung_task_panic_setup);
|
||||
|
||||
static int
|
||||
hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
|
||||
{
|
||||
did_panic = 1;
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static struct notifier_block panic_block = {
|
||||
.notifier_call = hung_task_panic,
|
||||
};
|
||||
|
||||
static void check_hung_task(struct task_struct *t, unsigned long timeout)
|
||||
{
|
||||
unsigned long switch_count = t->nvcsw + t->nivcsw;
|
||||
|
||||
/*
|
||||
* Ensure the task is not frozen.
|
||||
* Also, when a freshly created task is scheduled once, changes
|
||||
* its state to TASK_UNINTERRUPTIBLE without having ever been
|
||||
* switched out once, it musn't be checked.
|
||||
*/
|
||||
if (unlikely(t->flags & PF_FROZEN || !switch_count))
|
||||
return;
|
||||
|
||||
if (switch_count != t->last_switch_count) {
|
||||
t->last_switch_count = switch_count;
|
||||
return;
|
||||
}
|
||||
if (!sysctl_hung_task_warnings)
|
||||
return;
|
||||
sysctl_hung_task_warnings--;
|
||||
|
||||
/*
|
||||
* Ok, the task did not get scheduled for more than 2 minutes,
|
||||
* complain:
|
||||
*/
|
||||
printk(KERN_ERR "INFO: task %s:%d blocked for more than "
|
||||
"%ld seconds.\n", t->comm, t->pid, timeout);
|
||||
printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
|
||||
" disables this message.\n");
|
||||
sched_show_task(t);
|
||||
__debug_show_held_locks(t);
|
||||
|
||||
touch_nmi_watchdog();
|
||||
|
||||
if (sysctl_hung_task_panic)
|
||||
panic("hung_task: blocked tasks");
|
||||
}
|
||||
|
||||
/*
|
||||
* To avoid extending the RCU grace period for an unbounded amount of time,
|
||||
* periodically exit the critical section and enter a new one.
|
||||
*
|
||||
* For preemptible RCU it is sufficient to call rcu_read_unlock in order
|
||||
* exit the grace period. For classic RCU, a reschedule is required.
|
||||
*/
|
||||
static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
|
||||
{
|
||||
get_task_struct(g);
|
||||
get_task_struct(t);
|
||||
rcu_read_unlock();
|
||||
cond_resched();
|
||||
rcu_read_lock();
|
||||
put_task_struct(t);
|
||||
put_task_struct(g);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
|
||||
* a really long time (120 seconds). If that happens, print out
|
||||
* a warning.
|
||||
*/
|
||||
static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
||||
{
|
||||
int max_count = sysctl_hung_task_check_count;
|
||||
int batch_count = HUNG_TASK_BATCHING;
|
||||
struct task_struct *g, *t;
|
||||
|
||||
/*
|
||||
* If the system crashed already then all bets are off,
|
||||
* do not report extra hung tasks:
|
||||
*/
|
||||
if (test_taint(TAINT_DIE) || did_panic)
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
do_each_thread(g, t) {
|
||||
if (!--max_count)
|
||||
goto unlock;
|
||||
if (!--batch_count) {
|
||||
batch_count = HUNG_TASK_BATCHING;
|
||||
rcu_lock_break(g, t);
|
||||
/* Exit if t or g was unhashed during refresh. */
|
||||
if (t->state == TASK_DEAD || g->state == TASK_DEAD)
|
||||
goto unlock;
|
||||
}
|
||||
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
|
||||
if (t->state == TASK_UNINTERRUPTIBLE)
|
||||
check_hung_task(t, timeout);
|
||||
} while_each_thread(g, t);
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static unsigned long timeout_jiffies(unsigned long timeout)
|
||||
{
|
||||
/* timeout of 0 will disable the watchdog */
|
||||
return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Process updating of timeout sysctl
|
||||
*/
|
||||
int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
|
||||
struct file *filp, void __user *buffer,
|
||||
size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
|
||||
|
||||
if (ret || !write)
|
||||
goto out;
|
||||
|
||||
wake_up_process(watchdog_task);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* kthread which checks for tasks stuck in D state
|
||||
*/
|
||||
static int watchdog(void *dummy)
|
||||
{
|
||||
set_user_nice(current, 0);
|
||||
|
||||
for ( ; ; ) {
|
||||
unsigned long timeout = sysctl_hung_task_timeout_secs;
|
||||
|
||||
while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
|
||||
timeout = sysctl_hung_task_timeout_secs;
|
||||
|
||||
check_hung_uninterruptible_tasks(timeout);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init hung_task_init(void)
|
||||
{
|
||||
atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
|
||||
watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
module_init(hung_task_init);
|
||||
|
|
@ -26,10 +26,12 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
|
|||
}
|
||||
|
||||
/**
|
||||
* devm_request_irq - allocate an interrupt line for a managed device
|
||||
* devm_request_threaded_irq - allocate an interrupt line for a managed device
|
||||
* @dev: device to request interrupt for
|
||||
* @irq: Interrupt line to allocate
|
||||
* @handler: Function to be called when the IRQ occurs
|
||||
* @thread_fn: function to be called in a threaded interrupt context. NULL
|
||||
* for devices which handle everything in @handler
|
||||
* @irqflags: Interrupt type flags
|
||||
* @devname: An ascii name for the claiming device
|
||||
* @dev_id: A cookie passed back to the handler function
|
||||
|
|
@ -42,9 +44,10 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
|
|||
* If an IRQ allocated with this function needs to be freed
|
||||
* separately, dev_free_irq() must be used.
|
||||
*/
|
||||
int devm_request_irq(struct device *dev, unsigned int irq,
|
||||
irq_handler_t handler, unsigned long irqflags,
|
||||
const char *devname, void *dev_id)
|
||||
int devm_request_threaded_irq(struct device *dev, unsigned int irq,
|
||||
irq_handler_t handler, irq_handler_t thread_fn,
|
||||
unsigned long irqflags, const char *devname,
|
||||
void *dev_id)
|
||||
{
|
||||
struct irq_devres *dr;
|
||||
int rc;
|
||||
|
|
@ -54,7 +57,8 @@ int devm_request_irq(struct device *dev, unsigned int irq,
|
|||
if (!dr)
|
||||
return -ENOMEM;
|
||||
|
||||
rc = request_irq(irq, handler, irqflags, devname, dev_id);
|
||||
rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname,
|
||||
dev_id);
|
||||
if (rc) {
|
||||
devres_free(dr);
|
||||
return rc;
|
||||
|
|
@ -66,7 +70,7 @@ int devm_request_irq(struct device *dev, unsigned int irq,
|
|||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(devm_request_irq);
|
||||
EXPORT_SYMBOL(devm_request_threaded_irq);
|
||||
|
||||
/**
|
||||
* devm_free_irq - free an interrupt
|
||||
|
|
|
|||
|
|
@ -339,6 +339,15 @@ irqreturn_t no_action(int cpl, void *dev_id)
|
|||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
static void warn_no_thread(unsigned int irq, struct irqaction *action)
|
||||
{
|
||||
if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags))
|
||||
return;
|
||||
|
||||
printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD "
|
||||
"but no thread function available.", irq, action->name);
|
||||
}
|
||||
|
||||
DEFINE_TRACE(irq_handler_entry);
|
||||
DEFINE_TRACE(irq_handler_exit);
|
||||
|
||||
|
|
@ -363,8 +372,47 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
|
|||
trace_irq_handler_entry(irq, action);
|
||||
ret = action->handler(irq, action->dev_id);
|
||||
trace_irq_handler_exit(irq, action, ret);
|
||||
if (ret == IRQ_HANDLED)
|
||||
|
||||
switch (ret) {
|
||||
case IRQ_WAKE_THREAD:
|
||||
/*
|
||||
* Set result to handled so the spurious check
|
||||
* does not trigger.
|
||||
*/
|
||||
ret = IRQ_HANDLED;
|
||||
|
||||
/*
|
||||
* Catch drivers which return WAKE_THREAD but
|
||||
* did not set up a thread function
|
||||
*/
|
||||
if (unlikely(!action->thread_fn)) {
|
||||
warn_no_thread(irq, action);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wake up the handler thread for this
|
||||
* action. In case the thread crashed and was
|
||||
* killed we just pretend that we handled the
|
||||
* interrupt. The hardirq handler above has
|
||||
* disabled the device interrupt, so no irq
|
||||
* storm is lurking.
|
||||
*/
|
||||
if (likely(!test_bit(IRQTF_DIED,
|
||||
&action->thread_flags))) {
|
||||
set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
|
||||
wake_up_process(action->thread);
|
||||
}
|
||||
|
||||
/* Fall through to add to randomness */
|
||||
case IRQ_HANDLED:
|
||||
status |= action->flags;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
retval |= ret;
|
||||
action = action->next;
|
||||
} while (action);
|
||||
|
|
|
|||
|
|
@ -8,16 +8,15 @@
|
|||
*/
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
|
||||
cpumask_var_t irq_default_affinity;
|
||||
|
||||
/**
|
||||
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
|
||||
* @irq: interrupt number to wait for
|
||||
|
|
@ -53,9 +52,18 @@ void synchronize_irq(unsigned int irq)
|
|||
|
||||
/* Oops, that failed? */
|
||||
} while (status & IRQ_INPROGRESS);
|
||||
|
||||
/*
|
||||
* We made sure that no hardirq handler is running. Now verify
|
||||
* that no threaded handlers are active.
|
||||
*/
|
||||
wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active));
|
||||
}
|
||||
EXPORT_SYMBOL(synchronize_irq);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
cpumask_var_t irq_default_affinity;
|
||||
|
||||
/**
|
||||
* irq_can_set_affinity - Check if the affinity of a given irq can be set
|
||||
* @irq: Interrupt to check
|
||||
|
|
@ -72,6 +80,18 @@ int irq_can_set_affinity(unsigned int irq)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
|
||||
{
|
||||
struct irqaction *action = desc->action;
|
||||
|
||||
while (action) {
|
||||
if (action->thread)
|
||||
set_cpus_allowed_ptr(action->thread, cpumask);
|
||||
action = action->next;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* irq_set_affinity - Set the irq affinity of a given irq
|
||||
* @irq: Interrupt to set affinity
|
||||
|
|
@ -100,6 +120,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
|
|||
cpumask_copy(desc->affinity, cpumask);
|
||||
desc->chip->set_affinity(irq, cpumask);
|
||||
#endif
|
||||
irq_set_thread_affinity(desc, cpumask);
|
||||
desc->status |= IRQ_AFFINITY_SET;
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
return 0;
|
||||
|
|
@ -150,6 +171,8 @@ int irq_select_affinity_usr(unsigned int irq)
|
|||
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
ret = setup_affinity(irq, desc);
|
||||
if (!ret)
|
||||
irq_set_thread_affinity(desc, desc->affinity);
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
|
||||
return ret;
|
||||
|
|
@ -401,6 +424,90 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int irq_wait_for_interrupt(struct irqaction *action)
|
||||
{
|
||||
while (!kthread_should_stop()) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
if (test_and_clear_bit(IRQTF_RUNTHREAD,
|
||||
&action->thread_flags)) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return 0;
|
||||
}
|
||||
schedule();
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Interrupt handler thread
|
||||
*/
|
||||
static int irq_thread(void *data)
|
||||
{
|
||||
struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, };
|
||||
struct irqaction *action = data;
|
||||
struct irq_desc *desc = irq_to_desc(action->irq);
|
||||
int wake;
|
||||
|
||||
sched_setscheduler(current, SCHED_FIFO, ¶m);
|
||||
current->irqaction = action;
|
||||
|
||||
while (!irq_wait_for_interrupt(action)) {
|
||||
|
||||
atomic_inc(&desc->threads_active);
|
||||
|
||||
spin_lock_irq(&desc->lock);
|
||||
if (unlikely(desc->status & IRQ_DISABLED)) {
|
||||
/*
|
||||
* CHECKME: We might need a dedicated
|
||||
* IRQ_THREAD_PENDING flag here, which
|
||||
* retriggers the thread in check_irq_resend()
|
||||
* but AFAICT IRQ_PENDING should be fine as it
|
||||
* retriggers the interrupt itself --- tglx
|
||||
*/
|
||||
desc->status |= IRQ_PENDING;
|
||||
spin_unlock_irq(&desc->lock);
|
||||
} else {
|
||||
spin_unlock_irq(&desc->lock);
|
||||
|
||||
action->thread_fn(action->irq, action->dev_id);
|
||||
}
|
||||
|
||||
wake = atomic_dec_and_test(&desc->threads_active);
|
||||
|
||||
if (wake && waitqueue_active(&desc->wait_for_threads))
|
||||
wake_up(&desc->wait_for_threads);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear irqaction. Otherwise exit_irq_thread() would make
|
||||
* fuzz about an active irq thread going into nirvana.
|
||||
*/
|
||||
current->irqaction = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from do_exit()
|
||||
*/
|
||||
void exit_irq_thread(void)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
if (!tsk->irqaction)
|
||||
return;
|
||||
|
||||
printk(KERN_ERR
|
||||
"exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
|
||||
tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
|
||||
|
||||
/*
|
||||
* Set the THREAD DIED flag to prevent further wakeups of the
|
||||
* soon to be gone threaded handler.
|
||||
*/
|
||||
set_bit(IRQTF_DIED, &tsk->irqaction->flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal function to register an irqaction - typically used to
|
||||
* allocate special interrupts that are part of the architecture.
|
||||
|
|
@ -436,6 +543,26 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
|
|||
rand_initialize_irq(irq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Threaded handler ?
|
||||
*/
|
||||
if (new->thread_fn) {
|
||||
struct task_struct *t;
|
||||
|
||||
t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
|
||||
new->name);
|
||||
if (IS_ERR(t))
|
||||
return PTR_ERR(t);
|
||||
/*
|
||||
* We keep the reference to the task struct even if
|
||||
* the thread dies to avoid that the interrupt code
|
||||
* references an already freed task_struct.
|
||||
*/
|
||||
get_task_struct(t);
|
||||
new->thread = t;
|
||||
wake_up_process(t);
|
||||
}
|
||||
|
||||
/*
|
||||
* The following block of code has to be executed atomically
|
||||
*/
|
||||
|
|
@ -473,15 +600,15 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
|
|||
if (!shared) {
|
||||
irq_chip_set_defaults(desc->chip);
|
||||
|
||||
init_waitqueue_head(&desc->wait_for_threads);
|
||||
|
||||
/* Setup the type (level, edge polarity) if configured: */
|
||||
if (new->flags & IRQF_TRIGGER_MASK) {
|
||||
ret = __irq_set_trigger(desc, irq,
|
||||
new->flags & IRQF_TRIGGER_MASK);
|
||||
|
||||
if (ret) {
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
return ret;
|
||||
}
|
||||
if (ret)
|
||||
goto out_thread;
|
||||
} else
|
||||
compat_irq_chip_set_default_handler(desc);
|
||||
#if defined(CONFIG_IRQ_PER_CPU)
|
||||
|
|
@ -549,8 +676,19 @@ mismatch:
|
|||
dump_stack();
|
||||
}
|
||||
#endif
|
||||
ret = -EBUSY;
|
||||
|
||||
out_thread:
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
return -EBUSY;
|
||||
if (new->thread) {
|
||||
struct task_struct *t = new->thread;
|
||||
|
||||
new->thread = NULL;
|
||||
if (likely(!test_bit(IRQTF_DIED, &new->thread_flags)))
|
||||
kthread_stop(t);
|
||||
put_task_struct(t);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -576,6 +714,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
|
|||
{
|
||||
struct irq_desc *desc = irq_to_desc(irq);
|
||||
struct irqaction *action, **action_ptr;
|
||||
struct task_struct *irqthread;
|
||||
unsigned long flags;
|
||||
|
||||
WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
|
||||
|
|
@ -622,6 +761,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
|
|||
else
|
||||
desc->chip->disable(irq);
|
||||
}
|
||||
|
||||
irqthread = action->thread;
|
||||
action->thread = NULL;
|
||||
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
|
||||
unregister_handler_proc(irq, action);
|
||||
|
|
@ -629,6 +772,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
|
|||
/* Make sure it's not being used on another CPU: */
|
||||
synchronize_irq(irq);
|
||||
|
||||
if (irqthread) {
|
||||
if (!test_bit(IRQTF_DIED, &action->thread_flags))
|
||||
kthread_stop(irqthread);
|
||||
put_task_struct(irqthread);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_SHIRQ
|
||||
/*
|
||||
* It's a shared IRQ -- the driver ought to be prepared for an IRQ
|
||||
|
|
@ -681,9 +830,12 @@ void free_irq(unsigned int irq, void *dev_id)
|
|||
EXPORT_SYMBOL(free_irq);
|
||||
|
||||
/**
|
||||
* request_irq - allocate an interrupt line
|
||||
* request_threaded_irq - allocate an interrupt line
|
||||
* @irq: Interrupt line to allocate
|
||||
* @handler: Function to be called when the IRQ occurs
|
||||
* @handler: Function to be called when the IRQ occurs.
|
||||
* Primary handler for threaded interrupts
|
||||
* @thread_fn: Function called from the irq handler thread
|
||||
* If NULL, no irq thread is created
|
||||
* @irqflags: Interrupt type flags
|
||||
* @devname: An ascii name for the claiming device
|
||||
* @dev_id: A cookie passed back to the handler function
|
||||
|
|
@ -695,6 +847,15 @@ EXPORT_SYMBOL(free_irq);
|
|||
* raises, you must take care both to initialise your hardware
|
||||
* and to set up the interrupt handler in the right order.
|
||||
*
|
||||
* If you want to set up a threaded irq handler for your device
|
||||
* then you need to supply @handler and @thread_fn. @handler ist
|
||||
* still called in hard interrupt context and has to check
|
||||
* whether the interrupt originates from the device. If yes it
|
||||
* needs to disable the interrupt on the device and return
|
||||
* IRQ_THREAD_WAKE which will wake up the handler thread and run
|
||||
* @thread_fn. This split handler design is necessary to support
|
||||
* shared interrupts.
|
||||
*
|
||||
* Dev_id must be globally unique. Normally the address of the
|
||||
* device data structure is used as the cookie. Since the handler
|
||||
* receives this value it makes sense to use it.
|
||||
|
|
@ -710,8 +871,9 @@ EXPORT_SYMBOL(free_irq);
|
|||
* IRQF_TRIGGER_* Specify active edge(s) or level
|
||||
*
|
||||
*/
|
||||
int request_irq(unsigned int irq, irq_handler_t handler,
|
||||
unsigned long irqflags, const char *devname, void *dev_id)
|
||||
int request_threaded_irq(unsigned int irq, irq_handler_t handler,
|
||||
irq_handler_t thread_fn, unsigned long irqflags,
|
||||
const char *devname, void *dev_id)
|
||||
{
|
||||
struct irqaction *action;
|
||||
struct irq_desc *desc;
|
||||
|
|
@ -759,6 +921,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
|
|||
return -ENOMEM;
|
||||
|
||||
action->handler = handler;
|
||||
action->thread_fn = thread_fn;
|
||||
action->flags = irqflags;
|
||||
action->name = devname;
|
||||
action->dev_id = dev_id;
|
||||
|
|
@ -788,4 +951,4 @@ int request_irq(unsigned int irq, irq_handler_t handler,
|
|||
#endif
|
||||
return retval;
|
||||
}
|
||||
EXPORT_SYMBOL(request_irq);
|
||||
EXPORT_SYMBOL(request_threaded_irq);
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
|
|||
static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
|
||||
{
|
||||
free_kstat_irqs(old_desc, desc);
|
||||
free_desc_masks(old_desc, desc);
|
||||
arch_free_chip_data(old_desc, desc);
|
||||
}
|
||||
|
||||
|
|
|
|||
285
kernel/kprobes.c
285
kernel/kprobes.c
|
|
@ -68,7 +68,7 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
|
|||
static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
|
||||
|
||||
/* NOTE: change this value only with kprobe_mutex held */
|
||||
static bool kprobe_enabled;
|
||||
static bool kprobes_all_disarmed;
|
||||
|
||||
static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
|
||||
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
|
||||
|
|
@ -328,7 +328,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
|||
struct kprobe *kp;
|
||||
|
||||
list_for_each_entry_rcu(kp, &p->list, list) {
|
||||
if (kp->pre_handler && !kprobe_gone(kp)) {
|
||||
if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
|
||||
set_kprobe_instance(kp);
|
||||
if (kp->pre_handler(kp, regs))
|
||||
return 1;
|
||||
|
|
@ -344,7 +344,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
|
|||
struct kprobe *kp;
|
||||
|
||||
list_for_each_entry_rcu(kp, &p->list, list) {
|
||||
if (kp->post_handler && !kprobe_gone(kp)) {
|
||||
if (kp->post_handler && likely(!kprobe_disabled(kp))) {
|
||||
set_kprobe_instance(kp);
|
||||
kp->post_handler(kp, regs, flags);
|
||||
reset_kprobe_instance();
|
||||
|
|
@ -518,20 +518,28 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
|
|||
}
|
||||
|
||||
/*
|
||||
* Add the new probe to old_p->list. Fail if this is the
|
||||
* Add the new probe to ap->list. Fail if this is the
|
||||
* second jprobe at the address - two jprobes can't coexist
|
||||
*/
|
||||
static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
|
||||
static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
|
||||
{
|
||||
BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
|
||||
if (p->break_handler) {
|
||||
if (old_p->break_handler)
|
||||
if (ap->break_handler)
|
||||
return -EEXIST;
|
||||
list_add_tail_rcu(&p->list, &old_p->list);
|
||||
old_p->break_handler = aggr_break_handler;
|
||||
list_add_tail_rcu(&p->list, &ap->list);
|
||||
ap->break_handler = aggr_break_handler;
|
||||
} else
|
||||
list_add_rcu(&p->list, &old_p->list);
|
||||
if (p->post_handler && !old_p->post_handler)
|
||||
old_p->post_handler = aggr_post_handler;
|
||||
list_add_rcu(&p->list, &ap->list);
|
||||
if (p->post_handler && !ap->post_handler)
|
||||
ap->post_handler = aggr_post_handler;
|
||||
|
||||
if (kprobe_disabled(ap) && !kprobe_disabled(p)) {
|
||||
ap->flags &= ~KPROBE_FLAG_DISABLED;
|
||||
if (!kprobes_all_disarmed)
|
||||
/* Arm the breakpoint again. */
|
||||
arch_arm_kprobe(ap);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -544,6 +552,7 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
|
|||
copy_kprobe(p, ap);
|
||||
flush_insn_slot(ap);
|
||||
ap->addr = p->addr;
|
||||
ap->flags = p->flags;
|
||||
ap->pre_handler = aggr_pre_handler;
|
||||
ap->fault_handler = aggr_fault_handler;
|
||||
/* We don't care the kprobe which has gone. */
|
||||
|
|
@ -566,44 +575,59 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
|
|||
struct kprobe *p)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kprobe *ap;
|
||||
struct kprobe *ap = old_p;
|
||||
|
||||
if (kprobe_gone(old_p)) {
|
||||
if (old_p->pre_handler != aggr_pre_handler) {
|
||||
/* If old_p is not an aggr_probe, create new aggr_kprobe. */
|
||||
ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
|
||||
if (!ap)
|
||||
return -ENOMEM;
|
||||
add_aggr_kprobe(ap, old_p);
|
||||
}
|
||||
|
||||
if (kprobe_gone(ap)) {
|
||||
/*
|
||||
* Attempting to insert new probe at the same location that
|
||||
* had a probe in the module vaddr area which already
|
||||
* freed. So, the instruction slot has already been
|
||||
* released. We need a new slot for the new probe.
|
||||
*/
|
||||
ret = arch_prepare_kprobe(old_p);
|
||||
ret = arch_prepare_kprobe(ap);
|
||||
if (ret)
|
||||
/*
|
||||
* Even if fail to allocate new slot, don't need to
|
||||
* free aggr_probe. It will be used next time, or
|
||||
* freed by unregister_kprobe.
|
||||
*/
|
||||
return ret;
|
||||
}
|
||||
if (old_p->pre_handler == aggr_pre_handler) {
|
||||
copy_kprobe(old_p, p);
|
||||
ret = add_new_kprobe(old_p, p);
|
||||
ap = old_p;
|
||||
} else {
|
||||
ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
|
||||
if (!ap) {
|
||||
if (kprobe_gone(old_p))
|
||||
arch_remove_kprobe(old_p);
|
||||
return -ENOMEM;
|
||||
}
|
||||
add_aggr_kprobe(ap, old_p);
|
||||
copy_kprobe(ap, p);
|
||||
ret = add_new_kprobe(ap, p);
|
||||
}
|
||||
if (kprobe_gone(old_p)) {
|
||||
|
||||
/*
|
||||
* If the old_p has gone, its breakpoint has been disarmed.
|
||||
* We have to arm it again after preparing real kprobes.
|
||||
* Clear gone flag to prevent allocating new slot again, and
|
||||
* set disabled flag because it is not armed yet.
|
||||
*/
|
||||
ap->flags &= ~KPROBE_FLAG_GONE;
|
||||
if (kprobe_enabled)
|
||||
arch_arm_kprobe(ap);
|
||||
ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
|
||||
| KPROBE_FLAG_DISABLED;
|
||||
}
|
||||
return ret;
|
||||
|
||||
copy_kprobe(ap, p);
|
||||
return add_new_kprobe(ap, p);
|
||||
}
|
||||
|
||||
/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
|
||||
static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
list_for_each_entry_rcu(kp, &p->list, list) {
|
||||
if (!kprobe_disabled(kp))
|
||||
/*
|
||||
* There is an active probe on the list.
|
||||
* We can't disable aggr_kprobe.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
p->flags |= KPROBE_FLAG_DISABLED;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __kprobes in_kprobes_functions(unsigned long addr)
|
||||
|
|
@ -664,7 +688,9 @@ int __kprobes register_kprobe(struct kprobe *p)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
p->flags = 0;
|
||||
/* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
|
||||
p->flags &= KPROBE_FLAG_DISABLED;
|
||||
|
||||
/*
|
||||
* Check if are we probing a module.
|
||||
*/
|
||||
|
|
@ -709,7 +735,7 @@ int __kprobes register_kprobe(struct kprobe *p)
|
|||
hlist_add_head_rcu(&p->hlist,
|
||||
&kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
|
||||
|
||||
if (kprobe_enabled)
|
||||
if (!kprobes_all_disarmed && !kprobe_disabled(p))
|
||||
arch_arm_kprobe(p);
|
||||
|
||||
out_unlock_text:
|
||||
|
|
@ -722,6 +748,27 @@ out:
|
|||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_kprobe);
|
||||
|
||||
/* Check passed kprobe is valid and return kprobe in kprobe_table. */
|
||||
static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct kprobe *old_p, *list_p;
|
||||
|
||||
old_p = get_kprobe(p->addr);
|
||||
if (unlikely(!old_p))
|
||||
return NULL;
|
||||
|
||||
if (p != old_p) {
|
||||
list_for_each_entry_rcu(list_p, &old_p->list, list)
|
||||
if (list_p == p)
|
||||
/* kprobe p is a valid probe */
|
||||
goto valid;
|
||||
return NULL;
|
||||
}
|
||||
valid:
|
||||
return old_p;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unregister a kprobe without a scheduler synchronization.
|
||||
|
|
@ -730,18 +777,10 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p)
|
|||
{
|
||||
struct kprobe *old_p, *list_p;
|
||||
|
||||
old_p = get_kprobe(p->addr);
|
||||
if (unlikely(!old_p))
|
||||
old_p = __get_valid_kprobe(p);
|
||||
if (old_p == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
if (p != old_p) {
|
||||
list_for_each_entry_rcu(list_p, &old_p->list, list)
|
||||
if (list_p == p)
|
||||
/* kprobe p is a valid probe */
|
||||
goto valid_p;
|
||||
return -EINVAL;
|
||||
}
|
||||
valid_p:
|
||||
if (old_p == p ||
|
||||
(old_p->pre_handler == aggr_pre_handler &&
|
||||
list_is_singular(&old_p->list))) {
|
||||
|
|
@ -750,7 +789,7 @@ valid_p:
|
|||
* enabled and not gone - otherwise, the breakpoint would
|
||||
* already have been removed. We save on flushing icache.
|
||||
*/
|
||||
if (kprobe_enabled && !kprobe_gone(old_p)) {
|
||||
if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) {
|
||||
mutex_lock(&text_mutex);
|
||||
arch_disarm_kprobe(p);
|
||||
mutex_unlock(&text_mutex);
|
||||
|
|
@ -768,6 +807,11 @@ valid_p:
|
|||
}
|
||||
noclean:
|
||||
list_del_rcu(&p->list);
|
||||
if (!kprobe_disabled(old_p)) {
|
||||
try_to_disable_aggr_kprobe(old_p);
|
||||
if (!kprobes_all_disarmed && kprobe_disabled(old_p))
|
||||
arch_disarm_kprobe(old_p);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -803,11 +847,13 @@ int __kprobes register_kprobes(struct kprobe **kps, int num)
|
|||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_kprobes);
|
||||
|
||||
void __kprobes unregister_kprobe(struct kprobe *p)
|
||||
{
|
||||
unregister_kprobes(&p, 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_kprobe);
|
||||
|
||||
void __kprobes unregister_kprobes(struct kprobe **kps, int num)
|
||||
{
|
||||
|
|
@ -826,6 +872,7 @@ void __kprobes unregister_kprobes(struct kprobe **kps, int num)
|
|||
if (kps[i]->addr)
|
||||
__unregister_kprobe_bottom(kps[i]);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_kprobes);
|
||||
|
||||
static struct notifier_block kprobe_exceptions_nb = {
|
||||
.notifier_call = kprobe_exceptions_notify,
|
||||
|
|
@ -865,16 +912,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num)
|
|||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_jprobes);
|
||||
|
||||
int __kprobes register_jprobe(struct jprobe *jp)
|
||||
{
|
||||
return register_jprobes(&jp, 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_jprobe);
|
||||
|
||||
void __kprobes unregister_jprobe(struct jprobe *jp)
|
||||
{
|
||||
unregister_jprobes(&jp, 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_jprobe);
|
||||
|
||||
void __kprobes unregister_jprobes(struct jprobe **jps, int num)
|
||||
{
|
||||
|
|
@ -894,6 +944,7 @@ void __kprobes unregister_jprobes(struct jprobe **jps, int num)
|
|||
__unregister_kprobe_bottom(&jps[i]->kp);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_jprobes);
|
||||
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
/*
|
||||
|
|
@ -987,6 +1038,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
|
|||
free_rp_inst(rp);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_kretprobe);
|
||||
|
||||
int __kprobes register_kretprobes(struct kretprobe **rps, int num)
|
||||
{
|
||||
|
|
@ -1004,11 +1056,13 @@ int __kprobes register_kretprobes(struct kretprobe **rps, int num)
|
|||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_kretprobes);
|
||||
|
||||
void __kprobes unregister_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
unregister_kretprobes(&rp, 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_kretprobe);
|
||||
|
||||
void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
|
||||
{
|
||||
|
|
@ -1030,24 +1084,30 @@ void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
|
|||
}
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_kretprobes);
|
||||
|
||||
#else /* CONFIG_KRETPROBES */
|
||||
int __kprobes register_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_kretprobe);
|
||||
|
||||
int __kprobes register_kretprobes(struct kretprobe **rps, int num)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_kretprobes);
|
||||
|
||||
void __kprobes unregister_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_kretprobe);
|
||||
|
||||
void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
|
||||
{
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_kretprobes);
|
||||
|
||||
static int __kprobes pre_handler_kretprobe(struct kprobe *p,
|
||||
struct pt_regs *regs)
|
||||
|
|
@ -1061,6 +1121,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
|
|||
static void __kprobes kill_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
p->flags |= KPROBE_FLAG_GONE;
|
||||
if (p->pre_handler == aggr_pre_handler) {
|
||||
/*
|
||||
|
|
@ -1173,8 +1234,8 @@ static int __init init_kprobes(void)
|
|||
}
|
||||
}
|
||||
|
||||
/* By default, kprobes are enabled */
|
||||
kprobe_enabled = true;
|
||||
/* By default, kprobes are armed */
|
||||
kprobes_all_disarmed = false;
|
||||
|
||||
err = arch_init_kprobes();
|
||||
if (!err)
|
||||
|
|
@ -1202,12 +1263,18 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
|
|||
else
|
||||
kprobe_type = "k";
|
||||
if (sym)
|
||||
seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type,
|
||||
sym, offset, (modname ? modname : " "),
|
||||
(kprobe_gone(p) ? "[GONE]" : ""));
|
||||
seq_printf(pi, "%p %s %s+0x%x %s %s%s\n",
|
||||
p->addr, kprobe_type, sym, offset,
|
||||
(modname ? modname : " "),
|
||||
(kprobe_gone(p) ? "[GONE]" : ""),
|
||||
((kprobe_disabled(p) && !kprobe_gone(p)) ?
|
||||
"[DISABLED]" : ""));
|
||||
else
|
||||
seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr,
|
||||
(kprobe_gone(p) ? "[GONE]" : ""));
|
||||
seq_printf(pi, "%p %s %p %s%s\n",
|
||||
p->addr, kprobe_type, p->addr,
|
||||
(kprobe_gone(p) ? "[GONE]" : ""),
|
||||
((kprobe_disabled(p) && !kprobe_gone(p)) ?
|
||||
"[DISABLED]" : ""));
|
||||
}
|
||||
|
||||
static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
|
||||
|
|
@ -1272,7 +1339,72 @@ static struct file_operations debugfs_kprobes_operations = {
|
|||
.release = seq_release,
|
||||
};
|
||||
|
||||
static void __kprobes enable_all_kprobes(void)
|
||||
/* Disable one kprobe */
|
||||
int __kprobes disable_kprobe(struct kprobe *kp)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kprobe *p;
|
||||
|
||||
mutex_lock(&kprobe_mutex);
|
||||
|
||||
/* Check whether specified probe is valid. */
|
||||
p = __get_valid_kprobe(kp);
|
||||
if (unlikely(p == NULL)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* If the probe is already disabled (or gone), just return */
|
||||
if (kprobe_disabled(kp))
|
||||
goto out;
|
||||
|
||||
kp->flags |= KPROBE_FLAG_DISABLED;
|
||||
if (p != kp)
|
||||
/* When kp != p, p is always enabled. */
|
||||
try_to_disable_aggr_kprobe(p);
|
||||
|
||||
if (!kprobes_all_disarmed && kprobe_disabled(p))
|
||||
arch_disarm_kprobe(p);
|
||||
out:
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(disable_kprobe);
|
||||
|
||||
/* Enable one kprobe */
|
||||
int __kprobes enable_kprobe(struct kprobe *kp)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kprobe *p;
|
||||
|
||||
mutex_lock(&kprobe_mutex);
|
||||
|
||||
/* Check whether specified probe is valid. */
|
||||
p = __get_valid_kprobe(kp);
|
||||
if (unlikely(p == NULL)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (kprobe_gone(kp)) {
|
||||
/* This kprobe has gone, we couldn't enable it. */
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!kprobes_all_disarmed && kprobe_disabled(p))
|
||||
arch_arm_kprobe(p);
|
||||
|
||||
p->flags &= ~KPROBE_FLAG_DISABLED;
|
||||
if (p != kp)
|
||||
kp->flags &= ~KPROBE_FLAG_DISABLED;
|
||||
out:
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(enable_kprobe);
|
||||
|
||||
static void __kprobes arm_all_kprobes(void)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct hlist_node *node;
|
||||
|
|
@ -1281,20 +1413,20 @@ static void __kprobes enable_all_kprobes(void)
|
|||
|
||||
mutex_lock(&kprobe_mutex);
|
||||
|
||||
/* If kprobes are already enabled, just return */
|
||||
if (kprobe_enabled)
|
||||
/* If kprobes are armed, just return */
|
||||
if (!kprobes_all_disarmed)
|
||||
goto already_enabled;
|
||||
|
||||
mutex_lock(&text_mutex);
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
|
||||
head = &kprobe_table[i];
|
||||
hlist_for_each_entry_rcu(p, node, head, hlist)
|
||||
if (!kprobe_gone(p))
|
||||
if (!kprobe_disabled(p))
|
||||
arch_arm_kprobe(p);
|
||||
}
|
||||
mutex_unlock(&text_mutex);
|
||||
|
||||
kprobe_enabled = true;
|
||||
kprobes_all_disarmed = false;
|
||||
printk(KERN_INFO "Kprobes globally enabled\n");
|
||||
|
||||
already_enabled:
|
||||
|
|
@ -1302,7 +1434,7 @@ already_enabled:
|
|||
return;
|
||||
}
|
||||
|
||||
static void __kprobes disable_all_kprobes(void)
|
||||
static void __kprobes disarm_all_kprobes(void)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct hlist_node *node;
|
||||
|
|
@ -1311,17 +1443,17 @@ static void __kprobes disable_all_kprobes(void)
|
|||
|
||||
mutex_lock(&kprobe_mutex);
|
||||
|
||||
/* If kprobes are already disabled, just return */
|
||||
if (!kprobe_enabled)
|
||||
/* If kprobes are already disarmed, just return */
|
||||
if (kprobes_all_disarmed)
|
||||
goto already_disabled;
|
||||
|
||||
kprobe_enabled = false;
|
||||
kprobes_all_disarmed = true;
|
||||
printk(KERN_INFO "Kprobes globally disabled\n");
|
||||
mutex_lock(&text_mutex);
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
|
||||
head = &kprobe_table[i];
|
||||
hlist_for_each_entry_rcu(p, node, head, hlist) {
|
||||
if (!arch_trampoline_kprobe(p) && !kprobe_gone(p))
|
||||
if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
|
||||
arch_disarm_kprobe(p);
|
||||
}
|
||||
}
|
||||
|
|
@ -1347,7 +1479,7 @@ static ssize_t read_enabled_file_bool(struct file *file,
|
|||
{
|
||||
char buf[3];
|
||||
|
||||
if (kprobe_enabled)
|
||||
if (!kprobes_all_disarmed)
|
||||
buf[0] = '1';
|
||||
else
|
||||
buf[0] = '0';
|
||||
|
|
@ -1370,12 +1502,12 @@ static ssize_t write_enabled_file_bool(struct file *file,
|
|||
case 'y':
|
||||
case 'Y':
|
||||
case '1':
|
||||
enable_all_kprobes();
|
||||
arm_all_kprobes();
|
||||
break;
|
||||
case 'n':
|
||||
case 'N':
|
||||
case '0':
|
||||
disable_all_kprobes();
|
||||
disarm_all_kprobes();
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -1418,16 +1550,5 @@ late_initcall(debugfs_kprobe_init);
|
|||
|
||||
module_init(init_kprobes);
|
||||
|
||||
EXPORT_SYMBOL_GPL(register_kprobe);
|
||||
EXPORT_SYMBOL_GPL(unregister_kprobe);
|
||||
EXPORT_SYMBOL_GPL(register_kprobes);
|
||||
EXPORT_SYMBOL_GPL(unregister_kprobes);
|
||||
EXPORT_SYMBOL_GPL(register_jprobe);
|
||||
EXPORT_SYMBOL_GPL(unregister_jprobe);
|
||||
EXPORT_SYMBOL_GPL(register_jprobes);
|
||||
EXPORT_SYMBOL_GPL(unregister_jprobes);
|
||||
/* defined in arch/.../kernel/kprobes.c */
|
||||
EXPORT_SYMBOL_GPL(jprobe_return);
|
||||
EXPORT_SYMBOL_GPL(register_kretprobe);
|
||||
EXPORT_SYMBOL_GPL(unregister_kretprobe);
|
||||
EXPORT_SYMBOL_GPL(register_kretprobes);
|
||||
EXPORT_SYMBOL_GPL(unregister_kretprobes);
|
||||
|
|
|
|||
|
|
@ -76,6 +76,7 @@ static int kthread(void *_create)
|
|||
|
||||
/* OK, tell user we're spawned, wait for stop or wakeup */
|
||||
__set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
create->result = current;
|
||||
complete(&create->started);
|
||||
schedule();
|
||||
|
||||
|
|
@ -96,22 +97,10 @@ static void create_kthread(struct kthread_create_info *create)
|
|||
|
||||
/* We want our own signal handler (we take no signals by default). */
|
||||
pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
|
||||
if (pid < 0) {
|
||||
if (pid < 0)
|
||||
create->result = ERR_PTR(pid);
|
||||
} else {
|
||||
struct sched_param param = { .sched_priority = 0 };
|
||||
else
|
||||
wait_for_completion(&create->started);
|
||||
read_lock(&tasklist_lock);
|
||||
create->result = find_task_by_pid_ns(pid, &init_pid_ns);
|
||||
read_unlock(&tasklist_lock);
|
||||
/*
|
||||
* root may have changed our (kthreadd's) priority or CPU mask.
|
||||
* The kernel thread should not inherit these properties.
|
||||
*/
|
||||
sched_setscheduler(create->result, SCHED_NORMAL, ¶m);
|
||||
set_user_nice(create->result, KTHREAD_NICE_LEVEL);
|
||||
set_cpus_allowed_ptr(create->result, cpu_all_mask);
|
||||
}
|
||||
complete(&create->done);
|
||||
}
|
||||
|
||||
|
|
@ -154,11 +143,20 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
|
|||
wait_for_completion(&create.done);
|
||||
|
||||
if (!IS_ERR(create.result)) {
|
||||
struct sched_param param = { .sched_priority = 0 };
|
||||
va_list args;
|
||||
|
||||
va_start(args, namefmt);
|
||||
vsnprintf(create.result->comm, sizeof(create.result->comm),
|
||||
namefmt, args);
|
||||
va_end(args);
|
||||
/*
|
||||
* root may have changed our (kthreadd's) priority or CPU mask.
|
||||
* The kernel thread should not inherit these properties.
|
||||
*/
|
||||
sched_setscheduler_nocheck(create.result, SCHED_NORMAL, ¶m);
|
||||
set_user_nice(create.result, KTHREAD_NICE_LEVEL);
|
||||
set_cpus_allowed_ptr(create.result, cpu_all_mask);
|
||||
}
|
||||
return create.result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1952,9 +1952,6 @@ static noinline struct module *load_module(void __user *umod,
|
|||
if (strstarts(secstrings+sechdrs[i].sh_name, ".exit"))
|
||||
sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC;
|
||||
#endif
|
||||
/* Don't keep __versions around; it's just for loading. */
|
||||
if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0)
|
||||
sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC;
|
||||
}
|
||||
|
||||
modindex = find_sec(hdr, sechdrs, secstrings,
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ void update_rlimit_cpu(unsigned long rlim_new)
|
|||
|
||||
cputime = secs_to_cputime(rlim_new);
|
||||
if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
|
||||
cputime_lt(current->signal->it_prof_expires, cputime)) {
|
||||
cputime_gt(current->signal->it_prof_expires, cputime)) {
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
|
|
@ -224,7 +224,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
|
|||
cpu->cpu = virt_ticks(p);
|
||||
break;
|
||||
case CPUCLOCK_SCHED:
|
||||
cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p);
|
||||
cpu->sched = task_sched_runtime(p);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
|
|
@ -305,18 +305,19 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
|
|||
{
|
||||
struct task_cputime cputime;
|
||||
|
||||
thread_group_cputime(p, &cputime);
|
||||
switch (CPUCLOCK_WHICH(which_clock)) {
|
||||
default:
|
||||
return -EINVAL;
|
||||
case CPUCLOCK_PROF:
|
||||
thread_group_cputime(p, &cputime);
|
||||
cpu->cpu = cputime_add(cputime.utime, cputime.stime);
|
||||
break;
|
||||
case CPUCLOCK_VIRT:
|
||||
thread_group_cputime(p, &cputime);
|
||||
cpu->cpu = cputime.utime;
|
||||
break;
|
||||
case CPUCLOCK_SCHED:
|
||||
cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
|
||||
cpu->sched = thread_group_sched_runtime(p);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -21,9 +21,7 @@
|
|||
#include <linux/audit.h>
|
||||
#include <linux/pid_namespace.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
|
||||
/*
|
||||
|
|
@ -48,7 +46,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
|
|||
list_add(&child->ptrace_entry, &new_parent->ptraced);
|
||||
child->parent = new_parent;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Turn a tracing stop into a normal stop now, since with no tracer there
|
||||
* would be no way to wake it up with SIGCONT or SIGKILL. If there was a
|
||||
|
|
@ -173,7 +171,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
|
|||
task_lock(task);
|
||||
err = __ptrace_may_access(task, mode);
|
||||
task_unlock(task);
|
||||
return (!err ? true : false);
|
||||
return !err;
|
||||
}
|
||||
|
||||
int ptrace_attach(struct task_struct *task)
|
||||
|
|
@ -358,7 +356,7 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst
|
|||
copied += retval;
|
||||
src += retval;
|
||||
dst += retval;
|
||||
len -= retval;
|
||||
len -= retval;
|
||||
}
|
||||
return copied;
|
||||
}
|
||||
|
|
@ -383,7 +381,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
|
|||
copied += retval;
|
||||
src += retval;
|
||||
dst += retval;
|
||||
len -= retval;
|
||||
len -= retval;
|
||||
}
|
||||
return copied;
|
||||
}
|
||||
|
|
@ -496,9 +494,9 @@ static int ptrace_resume(struct task_struct *child, long request, long data)
|
|||
if (unlikely(!arch_has_single_step()))
|
||||
return -EIO;
|
||||
user_enable_single_step(child);
|
||||
}
|
||||
else
|
||||
} else {
|
||||
user_disable_single_step(child);
|
||||
}
|
||||
|
||||
child->exit_code = data;
|
||||
wake_up_process(child);
|
||||
|
|
|
|||
160
kernel/sched.c
160
kernel/sched.c
|
|
@ -1418,10 +1418,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
|||
struct rq_iterator *iterator);
|
||||
#endif
|
||||
|
||||
/* Time spent by the tasks of the cpu accounting group executing in ... */
|
||||
enum cpuacct_stat_index {
|
||||
CPUACCT_STAT_USER, /* ... user mode */
|
||||
CPUACCT_STAT_SYSTEM, /* ... kernel mode */
|
||||
|
||||
CPUACCT_STAT_NSTATS,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CGROUP_CPUACCT
|
||||
static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
|
||||
static void cpuacct_update_stats(struct task_struct *tsk,
|
||||
enum cpuacct_stat_index idx, cputime_t val);
|
||||
#else
|
||||
static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
|
||||
static inline void cpuacct_update_stats(struct task_struct *tsk,
|
||||
enum cpuacct_stat_index idx, cputime_t val) {}
|
||||
#endif
|
||||
|
||||
static inline void inc_cpu_load(struct rq *rq, unsigned long load)
|
||||
|
|
@ -4511,9 +4523,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
|
|||
EXPORT_PER_CPU_SYMBOL(kstat);
|
||||
|
||||
/*
|
||||
* Return any ns on the sched_clock that have not yet been banked in
|
||||
* Return any ns on the sched_clock that have not yet been accounted in
|
||||
* @p in case that task is currently running.
|
||||
*
|
||||
* Called with task_rq_lock() held on @rq.
|
||||
*/
|
||||
static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
|
||||
{
|
||||
u64 ns = 0;
|
||||
|
||||
if (task_current(rq, p)) {
|
||||
update_rq_clock(rq);
|
||||
ns = rq->clock - p->se.exec_start;
|
||||
if ((s64)ns < 0)
|
||||
ns = 0;
|
||||
}
|
||||
|
||||
return ns;
|
||||
}
|
||||
|
||||
unsigned long long task_delta_exec(struct task_struct *p)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
|
@ -4521,16 +4549,49 @@ unsigned long long task_delta_exec(struct task_struct *p)
|
|||
u64 ns = 0;
|
||||
|
||||
rq = task_rq_lock(p, &flags);
|
||||
ns = do_task_delta_exec(p, rq);
|
||||
task_rq_unlock(rq, &flags);
|
||||
|
||||
if (task_current(rq, p)) {
|
||||
u64 delta_exec;
|
||||
return ns;
|
||||
}
|
||||
|
||||
update_rq_clock(rq);
|
||||
delta_exec = rq->clock - p->se.exec_start;
|
||||
if ((s64)delta_exec > 0)
|
||||
ns = delta_exec;
|
||||
}
|
||||
/*
|
||||
* Return accounted runtime for the task.
|
||||
* In case the task is currently running, return the runtime plus current's
|
||||
* pending runtime that have not been accounted yet.
|
||||
*/
|
||||
unsigned long long task_sched_runtime(struct task_struct *p)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rq *rq;
|
||||
u64 ns = 0;
|
||||
|
||||
rq = task_rq_lock(p, &flags);
|
||||
ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
|
||||
task_rq_unlock(rq, &flags);
|
||||
|
||||
return ns;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return sum_exec_runtime for the thread group.
|
||||
* In case the task is currently running, return the sum plus current's
|
||||
* pending runtime that have not been accounted yet.
|
||||
*
|
||||
* Note that the thread group might have other running tasks as well,
|
||||
* so the return value not includes other pending runtime that other
|
||||
* running tasks might have.
|
||||
*/
|
||||
unsigned long long thread_group_sched_runtime(struct task_struct *p)
|
||||
{
|
||||
struct task_cputime totals;
|
||||
unsigned long flags;
|
||||
struct rq *rq;
|
||||
u64 ns;
|
||||
|
||||
rq = task_rq_lock(p, &flags);
|
||||
thread_group_cputime(p, &totals);
|
||||
ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
|
||||
task_rq_unlock(rq, &flags);
|
||||
|
||||
return ns;
|
||||
|
|
@ -4559,6 +4620,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
|
|||
cpustat->nice = cputime64_add(cpustat->nice, tmp);
|
||||
else
|
||||
cpustat->user = cputime64_add(cpustat->user, tmp);
|
||||
|
||||
cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
|
||||
/* Account for user time used */
|
||||
acct_update_integrals(p);
|
||||
}
|
||||
|
|
@ -4620,6 +4683,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
|
|||
else
|
||||
cpustat->system = cputime64_add(cpustat->system, tmp);
|
||||
|
||||
cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
|
||||
|
||||
/* Account for system time used */
|
||||
acct_update_integrals(p);
|
||||
}
|
||||
|
|
@ -7302,7 +7367,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
|||
cpumask_or(groupmask, groupmask, sched_group_cpus(group));
|
||||
|
||||
cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
|
||||
printk(KERN_CONT " %s", str);
|
||||
printk(KERN_CONT " %s (__cpu_power = %d)", str,
|
||||
group->__cpu_power);
|
||||
|
||||
group = group->next;
|
||||
} while (group != sd->groups);
|
||||
|
|
@ -9925,6 +9991,7 @@ struct cpuacct {
|
|||
struct cgroup_subsys_state css;
|
||||
/* cpuusage holds pointer to a u64-type object on every cpu */
|
||||
u64 *cpuusage;
|
||||
struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
|
||||
struct cpuacct *parent;
|
||||
};
|
||||
|
||||
|
|
@ -9949,20 +10016,32 @@ static struct cgroup_subsys_state *cpuacct_create(
|
|||
struct cgroup_subsys *ss, struct cgroup *cgrp)
|
||||
{
|
||||
struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
|
||||
int i;
|
||||
|
||||
if (!ca)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
goto out;
|
||||
|
||||
ca->cpuusage = alloc_percpu(u64);
|
||||
if (!ca->cpuusage) {
|
||||
kfree(ca);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
if (!ca->cpuusage)
|
||||
goto out_free_ca;
|
||||
|
||||
for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
|
||||
if (percpu_counter_init(&ca->cpustat[i], 0))
|
||||
goto out_free_counters;
|
||||
|
||||
if (cgrp->parent)
|
||||
ca->parent = cgroup_ca(cgrp->parent);
|
||||
|
||||
return &ca->css;
|
||||
|
||||
out_free_counters:
|
||||
while (--i >= 0)
|
||||
percpu_counter_destroy(&ca->cpustat[i]);
|
||||
free_percpu(ca->cpuusage);
|
||||
out_free_ca:
|
||||
kfree(ca);
|
||||
out:
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
/* destroy an existing cpu accounting group */
|
||||
|
|
@ -9970,7 +10049,10 @@ static void
|
|||
cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
||||
{
|
||||
struct cpuacct *ca = cgroup_ca(cgrp);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
|
||||
percpu_counter_destroy(&ca->cpustat[i]);
|
||||
free_percpu(ca->cpuusage);
|
||||
kfree(ca);
|
||||
}
|
||||
|
|
@ -10057,6 +10139,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static const char *cpuacct_stat_desc[] = {
|
||||
[CPUACCT_STAT_USER] = "user",
|
||||
[CPUACCT_STAT_SYSTEM] = "system",
|
||||
};
|
||||
|
||||
static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
|
||||
struct cgroup_map_cb *cb)
|
||||
{
|
||||
struct cpuacct *ca = cgroup_ca(cgrp);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
|
||||
s64 val = percpu_counter_read(&ca->cpustat[i]);
|
||||
val = cputime64_to_clock_t(val);
|
||||
cb->fill(cb, cpuacct_stat_desc[i], val);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct cftype files[] = {
|
||||
{
|
||||
.name = "usage",
|
||||
|
|
@ -10067,7 +10168,10 @@ static struct cftype files[] = {
|
|||
.name = "usage_percpu",
|
||||
.read_seq_string = cpuacct_percpu_seq_read,
|
||||
},
|
||||
|
||||
{
|
||||
.name = "stat",
|
||||
.read_map = cpuacct_stats_show,
|
||||
},
|
||||
};
|
||||
|
||||
static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
||||
|
|
@ -10089,12 +10193,38 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
|||
return;
|
||||
|
||||
cpu = task_cpu(tsk);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
ca = task_ca(tsk);
|
||||
|
||||
for (; ca; ca = ca->parent) {
|
||||
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
||||
*cpuusage += cputime;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
* Charge the system/user time to the task's accounting group.
|
||||
*/
|
||||
static void cpuacct_update_stats(struct task_struct *tsk,
|
||||
enum cpuacct_stat_index idx, cputime_t val)
|
||||
{
|
||||
struct cpuacct *ca;
|
||||
|
||||
if (unlikely(!cpuacct_subsys.active))
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
ca = task_ca(tsk);
|
||||
|
||||
do {
|
||||
percpu_counter_add(&ca->cpustat[idx], val);
|
||||
ca = ca->parent;
|
||||
} while (ca);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
struct cgroup_subsys cpuacct_subsys = {
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ static int convert_prio(int prio)
|
|||
* cpupri_find - find the best (lowest-pri) CPU in the system
|
||||
* @cp: The cpupri context
|
||||
* @p: The task
|
||||
* @lowest_mask: A mask to fill in with selected CPUs
|
||||
* @lowest_mask: A mask to fill in with selected CPUs (or NULL)
|
||||
*
|
||||
* Note: This function returns the recommended CPUs as calculated during the
|
||||
* current invokation. By the time the call returns, the CPUs may have in
|
||||
|
|
@ -81,7 +81,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
|
|||
if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
|
||||
continue;
|
||||
|
||||
cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
|
||||
if (lowest_mask)
|
||||
cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -948,20 +948,15 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
|
|||
|
||||
static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
cpumask_var_t mask;
|
||||
|
||||
if (rq->curr->rt.nr_cpus_allowed == 1)
|
||||
return;
|
||||
|
||||
if (!alloc_cpumask_var(&mask, GFP_ATOMIC))
|
||||
if (p->rt.nr_cpus_allowed != 1
|
||||
&& cpupri_find(&rq->rd->cpupri, p, NULL))
|
||||
return;
|
||||
|
||||
if (p->rt.nr_cpus_allowed != 1
|
||||
&& cpupri_find(&rq->rd->cpupri, p, mask))
|
||||
goto free;
|
||||
|
||||
if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask))
|
||||
goto free;
|
||||
if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
|
||||
return;
|
||||
|
||||
/*
|
||||
* There appears to be other cpus that can accept
|
||||
|
|
@ -970,8 +965,6 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
|
|||
*/
|
||||
requeue_task_rt(rq, p, 1);
|
||||
resched_task(rq->curr);
|
||||
free:
|
||||
free_cpumask_var(mask);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
|
|
|||
|
|
@ -165,98 +165,12 @@ void softlockup_tick(void)
|
|||
panic("softlockup: hung tasks");
|
||||
}
|
||||
|
||||
/*
|
||||
* Have a reasonable limit on the number of tasks checked:
|
||||
*/
|
||||
unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
|
||||
|
||||
/*
|
||||
* Zero means infinite timeout - no checking done:
|
||||
*/
|
||||
unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
|
||||
|
||||
unsigned long __read_mostly sysctl_hung_task_warnings = 10;
|
||||
|
||||
/*
|
||||
* Only do the hung-tasks check on one CPU:
|
||||
*/
|
||||
static int check_cpu __read_mostly = -1;
|
||||
|
||||
static void check_hung_task(struct task_struct *t, unsigned long now)
|
||||
{
|
||||
unsigned long switch_count = t->nvcsw + t->nivcsw;
|
||||
|
||||
if (t->flags & PF_FROZEN)
|
||||
return;
|
||||
|
||||
if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
|
||||
t->last_switch_count = switch_count;
|
||||
t->last_switch_timestamp = now;
|
||||
return;
|
||||
}
|
||||
if ((long)(now - t->last_switch_timestamp) <
|
||||
sysctl_hung_task_timeout_secs)
|
||||
return;
|
||||
if (!sysctl_hung_task_warnings)
|
||||
return;
|
||||
sysctl_hung_task_warnings--;
|
||||
|
||||
/*
|
||||
* Ok, the task did not get scheduled for more than 2 minutes,
|
||||
* complain:
|
||||
*/
|
||||
printk(KERN_ERR "INFO: task %s:%d blocked for more than "
|
||||
"%ld seconds.\n", t->comm, t->pid,
|
||||
sysctl_hung_task_timeout_secs);
|
||||
printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
|
||||
" disables this message.\n");
|
||||
sched_show_task(t);
|
||||
__debug_show_held_locks(t);
|
||||
|
||||
t->last_switch_timestamp = now;
|
||||
touch_nmi_watchdog();
|
||||
|
||||
if (softlockup_panic)
|
||||
panic("softlockup: blocked tasks");
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
|
||||
* a really long time (120 seconds). If that happens, print out
|
||||
* a warning.
|
||||
*/
|
||||
static void check_hung_uninterruptible_tasks(int this_cpu)
|
||||
{
|
||||
int max_count = sysctl_hung_task_check_count;
|
||||
unsigned long now = get_timestamp(this_cpu);
|
||||
struct task_struct *g, *t;
|
||||
|
||||
/*
|
||||
* If the system crashed already then all bets are off,
|
||||
* do not report extra hung tasks:
|
||||
*/
|
||||
if (test_taint(TAINT_DIE) || did_panic)
|
||||
return;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
do_each_thread(g, t) {
|
||||
if (!--max_count)
|
||||
goto unlock;
|
||||
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
|
||||
if (t->state == TASK_UNINTERRUPTIBLE)
|
||||
check_hung_task(t, now);
|
||||
} while_each_thread(g, t);
|
||||
unlock:
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* The watchdog thread - runs every second and touches the timestamp.
|
||||
*/
|
||||
static int watchdog(void *__bind_cpu)
|
||||
{
|
||||
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
|
||||
int this_cpu = (long)__bind_cpu;
|
||||
|
||||
sched_setscheduler(current, SCHED_FIFO, ¶m);
|
||||
|
||||
|
|
@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu)
|
|||
if (kthread_should_stop())
|
||||
break;
|
||||
|
||||
if (this_cpu == check_cpu) {
|
||||
if (sysctl_hung_task_timeout_secs)
|
||||
check_hung_uninterruptible_tasks(this_cpu);
|
||||
}
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
|
@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
|||
break;
|
||||
case CPU_ONLINE:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
check_cpu = cpumask_any(cpu_online_mask);
|
||||
wake_up_process(per_cpu(watchdog_task, hotcpu));
|
||||
break;
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
case CPU_DOWN_PREPARE:
|
||||
case CPU_DOWN_PREPARE_FROZEN:
|
||||
if (hotcpu == check_cpu) {
|
||||
/* Pick any other online cpu. */
|
||||
check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
|
||||
}
|
||||
break;
|
||||
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
if (!per_cpu(watchdog_task, hotcpu))
|
||||
|
|
|
|||
|
|
@ -101,6 +101,7 @@ static int __maybe_unused one = 1;
|
|||
static int __maybe_unused two = 2;
|
||||
static unsigned long one_ul = 1;
|
||||
static int one_hundred = 100;
|
||||
static int one_thousand = 1000;
|
||||
|
||||
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
|
||||
static int maxolduid = 65535;
|
||||
|
|
@ -813,6 +814,19 @@ static struct ctl_table kern_table[] = {
|
|||
.extra1 = &neg_one,
|
||||
.extra2 = &sixty,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_DETECT_HUNG_TASK
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "hung_task_panic",
|
||||
.data = &sysctl_hung_task_panic,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec_minmax,
|
||||
.strategy = &sysctl_intvec,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "hung_task_check_count",
|
||||
|
|
@ -828,7 +842,7 @@ static struct ctl_table kern_table[] = {
|
|||
.data = &sysctl_hung_task_timeout_secs,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_doulongvec_minmax,
|
||||
.proc_handler = &proc_dohung_task_timeout_secs,
|
||||
.strategy = &sysctl_intvec,
|
||||
},
|
||||
{
|
||||
|
|
@ -1026,6 +1040,28 @@ static struct ctl_table vm_table[] = {
|
|||
.mode = 0444 /* read-only*/,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "nr_pdflush_threads_min",
|
||||
.data = &nr_pdflush_threads_min,
|
||||
.maxlen = sizeof nr_pdflush_threads_min,
|
||||
.mode = 0644 /* read-write */,
|
||||
.proc_handler = &proc_dointvec_minmax,
|
||||
.strategy = &sysctl_intvec,
|
||||
.extra1 = &one,
|
||||
.extra2 = &nr_pdflush_threads_max,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "nr_pdflush_threads_max",
|
||||
.data = &nr_pdflush_threads_max,
|
||||
.maxlen = sizeof nr_pdflush_threads_max,
|
||||
.mode = 0644 /* read-write */,
|
||||
.proc_handler = &proc_dointvec_minmax,
|
||||
.strategy = &sysctl_intvec,
|
||||
.extra1 = &nr_pdflush_threads_min,
|
||||
.extra2 = &one_thousand,
|
||||
},
|
||||
{
|
||||
.ctl_name = VM_SWAPPINESS,
|
||||
.procname = "swappiness",
|
||||
|
|
|
|||
|
|
@ -531,10 +531,13 @@ static void __init_timer(struct timer_list *timer,
|
|||
}
|
||||
|
||||
/**
|
||||
* init_timer - initialize a timer.
|
||||
* init_timer_key - initialize a timer
|
||||
* @timer: the timer to be initialized
|
||||
* @name: name of the timer
|
||||
* @key: lockdep class key of the fake lock used for tracking timer
|
||||
* sync lock dependencies
|
||||
*
|
||||
* init_timer() must be done to a timer prior calling *any* of the
|
||||
* init_timer_key() must be done to a timer prior calling *any* of the
|
||||
* other timer functions.
|
||||
*/
|
||||
void init_timer_key(struct timer_list *timer,
|
||||
|
|
|
|||
|
|
@ -1377,12 +1377,12 @@ static int blk_trace_str2mask(const char *str)
|
|||
{
|
||||
int i;
|
||||
int mask = 0;
|
||||
char *s, *token;
|
||||
char *buf, *s, *token;
|
||||
|
||||
s = kstrdup(str, GFP_KERNEL);
|
||||
if (s == NULL)
|
||||
buf = kstrdup(str, GFP_KERNEL);
|
||||
if (buf == NULL)
|
||||
return -ENOMEM;
|
||||
s = strstrip(s);
|
||||
s = strstrip(buf);
|
||||
|
||||
while (1) {
|
||||
token = strsep(&s, ",");
|
||||
|
|
@ -1403,7 +1403,7 @@ static int blk_trace_str2mask(const char *str)
|
|||
break;
|
||||
}
|
||||
}
|
||||
kfree(s);
|
||||
kfree(buf);
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3268,19 +3268,13 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
|
|||
|
||||
info->tr = &global_trace;
|
||||
info->cpu = cpu;
|
||||
info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
|
||||
info->spare = NULL;
|
||||
/* Force reading ring buffer for first read */
|
||||
info->read = (unsigned int)-1;
|
||||
if (!info->spare)
|
||||
goto out;
|
||||
|
||||
filp->private_data = info;
|
||||
|
||||
return 0;
|
||||
|
||||
out:
|
||||
kfree(info);
|
||||
return -ENOMEM;
|
||||
return nonseekable_open(inode, filp);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
|
|
@ -3295,6 +3289,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
|
|||
if (!count)
|
||||
return 0;
|
||||
|
||||
if (!info->spare)
|
||||
info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
|
||||
if (!info->spare)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Do we have previous read data to read? */
|
||||
if (info->read < PAGE_SIZE)
|
||||
goto read;
|
||||
|
|
@ -3333,7 +3332,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
|
|||
{
|
||||
struct ftrace_buffer_info *info = file->private_data;
|
||||
|
||||
ring_buffer_free_read_page(info->tr->buffer, info->spare);
|
||||
if (info->spare)
|
||||
ring_buffer_free_read_page(info->tr->buffer, info->spare);
|
||||
kfree(info);
|
||||
|
||||
return 0;
|
||||
|
|
@ -3419,14 +3419,19 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
|
|||
int size, i;
|
||||
size_t ret;
|
||||
|
||||
/*
|
||||
* We can't seek on a buffer input
|
||||
*/
|
||||
if (unlikely(*ppos))
|
||||
return -ESPIPE;
|
||||
if (*ppos & (PAGE_SIZE - 1)) {
|
||||
WARN_ONCE(1, "Ftrace: previous read must page-align\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (len & (PAGE_SIZE - 1)) {
|
||||
WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
|
||||
if (len < PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
len &= PAGE_MASK;
|
||||
}
|
||||
|
||||
for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) {
|
||||
for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) {
|
||||
struct page *page;
|
||||
int r;
|
||||
|
||||
|
|
@ -3465,6 +3470,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
|
|||
spd.partial[i].offset = 0;
|
||||
spd.partial[i].private = (unsigned long)ref;
|
||||
spd.nr_pages++;
|
||||
*ppos += PAGE_SIZE;
|
||||
}
|
||||
|
||||
spd.nr_pages = i;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
#include <trace/syscall.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <asm/syscall.h>
|
||||
|
||||
#include "trace_output.h"
|
||||
|
|
|
|||
|
|
@ -966,20 +966,20 @@ undo:
|
|||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static struct workqueue_struct *work_on_cpu_wq __read_mostly;
|
||||
|
||||
struct work_for_cpu {
|
||||
struct work_struct work;
|
||||
struct completion completion;
|
||||
long (*fn)(void *);
|
||||
void *arg;
|
||||
long ret;
|
||||
};
|
||||
|
||||
static void do_work_for_cpu(struct work_struct *w)
|
||||
static int do_work_for_cpu(void *_wfc)
|
||||
{
|
||||
struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);
|
||||
|
||||
struct work_for_cpu *wfc = _wfc;
|
||||
wfc->ret = wfc->fn(wfc->arg);
|
||||
complete(&wfc->completion);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -990,17 +990,23 @@ static void do_work_for_cpu(struct work_struct *w)
|
|||
*
|
||||
* This will return the value @fn returns.
|
||||
* It is up to the caller to ensure that the cpu doesn't go offline.
|
||||
* The caller must not hold any locks which would prevent @fn from completing.
|
||||
*/
|
||||
long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
|
||||
{
|
||||
struct work_for_cpu wfc;
|
||||
|
||||
INIT_WORK(&wfc.work, do_work_for_cpu);
|
||||
wfc.fn = fn;
|
||||
wfc.arg = arg;
|
||||
queue_work_on(cpu, work_on_cpu_wq, &wfc.work);
|
||||
flush_work(&wfc.work);
|
||||
struct task_struct *sub_thread;
|
||||
struct work_for_cpu wfc = {
|
||||
.completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
|
||||
.fn = fn,
|
||||
.arg = arg,
|
||||
};
|
||||
|
||||
sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
|
||||
if (IS_ERR(sub_thread))
|
||||
return PTR_ERR(sub_thread);
|
||||
kthread_bind(sub_thread, cpu);
|
||||
wake_up_process(sub_thread);
|
||||
wait_for_completion(&wfc.completion);
|
||||
return wfc.ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(work_on_cpu);
|
||||
|
|
@ -1016,8 +1022,4 @@ void __init init_workqueues(void)
|
|||
hotcpu_notifier(workqueue_cpu_callback, 0);
|
||||
keventd_wq = create_workqueue("events");
|
||||
BUG_ON(!keventd_wq);
|
||||
#ifdef CONFIG_SMP
|
||||
work_on_cpu_wq = create_workqueue("work_on_cpu");
|
||||
BUG_ON(!work_on_cpu_wq);
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue