#include <linux/sysctl.h>
#include <linux/smpboot.h>
#include <linux/sched/rt.h>
+#include <linux/tick.h>
#include <asm/irq_regs.h>
#include <linux/kvm_para.h>
#include <linux/perf_event.h>
+#include <linux/kthread.h>
/*
* The run state of the lockup detectors is controlled by the content of the
#ifdef CONFIG_SMP
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
+int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
#else
#define sysctl_softlockup_all_cpu_backtrace 0
+#define sysctl_hardlockup_all_cpu_backtrace 0
#endif
+static struct cpumask watchdog_cpumask __read_mostly;
+unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
+/* Helper for online, unparked cpus. */
+#define for_each_watchdog_cpu(cpu) \
+ for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
+
+/*
+ * The 'watchdog_running' variable is set to 1 when the watchdog threads
+ * are registered/started and is set to 0 when the watchdog threads are
+ * unregistered/stopped, so it is an indicator whether the threads exist.
+ */
static int __read_mostly watchdog_running;
+/*
+ * If a subsystem has a need to deactivate the watchdog temporarily, it
+ * can use the suspend/resume interface to achieve this. The content of
+ * the 'watchdog_suspended' variable reflects this state. Existing threads
+ * are parked/unparked by the lockup_detector_{suspend|resume} functions
+ * (see comment blocks pertaining to those functions for further details).
+ *
+ * 'watchdog_suspended' also prevents threads from being registered/started
+ * or unregistered/stopped via parameters in /proc/sys/kernel, so the state
+ * of 'watchdog_running' cannot change while the watchdog is deactivated
+ * temporarily (see related code in 'proc' handlers).
+ */
+static int __read_mostly watchdog_suspended;
+
static u64 __read_mostly sample_period;
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
* Should we panic when a soft-lockup or hard-lockup occurs:
*/
#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static int hardlockup_panic =
+unsigned int __read_mostly hardlockup_panic =
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+static unsigned long hardlockup_allcpu_dumped;
/*
* We may not want to enable hard lockup detection by default in all cases,
* for example when running the kernel as a guest on a hypervisor. In these
return 1;
}
__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+static int __init hardlockup_all_cpu_backtrace_setup(char *str)
+{
+ sysctl_hardlockup_all_cpu_backtrace =
+ !!simple_strtol(str, NULL, 0);
+ return 1;
+}
+__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
#endif
/*
* do we care if a 0 races with a timestamp?
* all it means is the softlock check starts one cycle later
*/
- for_each_online_cpu(cpu)
+ for_each_watchdog_cpu(cpu)
per_cpu(watchdog_touch_ts, cpu) = 0;
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR
/* watchdog detector functions */
-static int is_hardlockup(void)
+static bool is_hardlockup(void)
{
unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
- return 1;
+ return true;
__this_cpu_write(hrtimer_interrupts_saved, hrint);
- return 0;
+ return false;
}
#endif
{
unsigned long now = get_timestamp();
- if (watchdog_enabled & SOFT_WATCHDOG_ENABLED) {
+ if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
/* Warn about unreasonable delays. */
if (time_after(now, touch_ts + get_softlockup_thresh()))
return now - touch_ts;
*/
printk_kill();
- if (hardlockup_panic) {
- panic("Watchdog detected hard LOCKUP on cpu %d",
- this_cpu);
- } else {
- raw_spin_lock(&watchdog_output_lock);
- WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
- this_cpu);
- raw_spin_unlock(&watchdog_output_lock);
- }
+ raw_spin_lock(&watchdog_output_lock);
+
+ pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ print_modules();
+ print_irqtrace_events(current);
+ if (regs)
+ show_regs(regs);
+ else
+ dump_stack();
+
+ /*
+ * Perform all-CPU dump only once to avoid multiple hardlockups
+ * generating interleaving traces
+ */
+ if (sysctl_hardlockup_all_cpu_backtrace &&
+ !test_and_set_bit(0, &hardlockup_allcpu_dumped))
+ trigger_allbutself_cpu_backtrace();
+
+ raw_spin_unlock(&watchdog_output_lock);
+ if (hardlockup_panic)
+ nmi_panic(regs, "Hard LOCKUP");
__this_cpu_write(hard_watchdog_warn, true);
return;
static int watchdog_nmi_enable(unsigned int cpu);
static void watchdog_nmi_disable(unsigned int cpu);
+static int watchdog_enable_all_cpus(void);
+static void watchdog_disable_all_cpus(void);
+
/* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
}
}
-void watchdog_nmi_enable_all(void)
-{
- int cpu;
-
- mutex_lock(&watchdog_proc_mutex);
-
- if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
- goto unlock;
-
- get_online_cpus();
- for_each_online_cpu(cpu)
- watchdog_nmi_enable(cpu);
- put_online_cpus();
-
-unlock:
- mutex_unlock(&watchdog_proc_mutex);
-}
-
-void watchdog_nmi_disable_all(void)
-{
- int cpu;
-
- mutex_lock(&watchdog_proc_mutex);
-
- if (!watchdog_running)
- goto unlock;
-
- get_online_cpus();
- for_each_online_cpu(cpu)
- watchdog_nmi_disable(cpu);
- put_online_cpus();
-
-unlock:
- mutex_unlock(&watchdog_proc_mutex);
-}
#else
static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
static void watchdog_nmi_disable(unsigned int cpu) { return; }
-void watchdog_nmi_enable_all(void) {}
-void watchdog_nmi_disable_all(void) {}
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
static struct smp_hotplug_thread watchdog_threads = {
.unpark = watchdog_enable,
};
-static void restart_watchdog_hrtimer(void *info)
+/*
+ * park all watchdog threads that are specified in 'watchdog_cpumask'
+ *
+ * This function returns an error if kthread_park() of a watchdog thread
+ * fails. In this situation, the watchdog threads of some CPUs can already
+ * be parked and the watchdog threads of other CPUs can still be runnable.
+ * Callers are expected to handle this special condition as appropriate in
+ * their context.
+ *
+ * This function may only be called in a context that is protected against
+ * races with CPU hotplug - for example, via get_online_cpus().
+ */
+static int watchdog_park_threads(void)
{
- struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
- int ret;
+ int cpu, ret = 0;
+
+ for_each_watchdog_cpu(cpu) {
+ ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * unpark all watchdog threads that are specified in 'watchdog_cpumask'
+ *
+ * This function may only be called in a context that is protected against
+ * races with CPU hotplug - for example, via get_online_cpus().
+ */
+static void watchdog_unpark_threads(void)
+{
+ int cpu;
+ for_each_watchdog_cpu(cpu)
+ kthread_unpark(per_cpu(softlockup_watchdog, cpu));
+}
+
+/*
+ * Suspend the hard and soft lockup detector by parking the watchdog threads.
+ */
+int lockup_detector_suspend(void)
+{
+ int ret = 0;
+
+ get_online_cpus();
+ mutex_lock(&watchdog_proc_mutex);
/*
- * No need to cancel and restart hrtimer if it is currently executing
- * because it will reprogram itself with the new period now.
- * We should never see it unqueued here because we are running per-cpu
- * with interrupts disabled.
+ * Multiple suspend requests can be active in parallel (counted by
+ * the 'watchdog_suspended' variable). If the watchdog threads are
+ * running, the first caller takes care that they will be parked.
+ * The state of 'watchdog_running' cannot change while a suspend
+ * request is active (see related code in 'proc' handlers).
*/
- ret = hrtimer_try_to_cancel(hrtimer);
- if (ret == 1)
- hrtimer_start(hrtimer, ns_to_ktime(sample_period),
- HRTIMER_MODE_REL_PINNED);
+ if (watchdog_running && !watchdog_suspended)
+ ret = watchdog_park_threads();
+
+ if (ret == 0)
+ watchdog_suspended++;
+ else {
+ watchdog_disable_all_cpus();
+ pr_err("Failed to suspend lockup detectors, disabled\n");
+ watchdog_enabled = 0;
+ }
+
+ mutex_unlock(&watchdog_proc_mutex);
+
+ return ret;
}
-static void update_watchdog(int cpu)
+/*
+ * Resume the hard and soft lockup detector by unparking the watchdog threads.
+ */
+void lockup_detector_resume(void)
{
+ mutex_lock(&watchdog_proc_mutex);
+
+ watchdog_suspended--;
/*
- * Make sure that perf event counter will adopt to a new
- * sampling period. Updating the sampling period directly would
- * be much nicer but we do not have an API for that now so
- * let's use a big hammer.
- * Hrtimer will adopt the new period on the next tick but this
- * might be late already so we have to restart the timer as well.
+ * The watchdog threads are unparked if they were previously running
+ * and if there is no more active suspend request.
*/
- watchdog_nmi_disable(cpu);
- smp_call_function_single(cpu, restart_watchdog_hrtimer, NULL, 1);
- watchdog_nmi_enable(cpu);
+ if (watchdog_running && !watchdog_suspended)
+ watchdog_unpark_threads();
+
+ mutex_unlock(&watchdog_proc_mutex);
+ put_online_cpus();
}
-static void update_watchdog_all_cpus(void)
+static int update_watchdog_all_cpus(void)
{
- int cpu;
+ int ret;
- get_online_cpus();
- for_each_online_cpu(cpu)
- update_watchdog(cpu);
- put_online_cpus();
+ ret = watchdog_park_threads();
+ if (ret)
+ return ret;
+
+ watchdog_unpark_threads();
+
+ return 0;
}
static int watchdog_enable_all_cpus(void)
int err = 0;
if (!watchdog_running) {
- err = smpboot_register_percpu_thread(&watchdog_threads);
+ err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
+ &watchdog_cpumask);
if (err)
pr_err("Failed to create watchdog threads, disabled\n");
else
* Enable/disable the lockup detectors or
* change the sample period 'on the fly'.
*/
- update_watchdog_all_cpus();
+ err = update_watchdog_all_cpus();
+
+ if (err) {
+ watchdog_disable_all_cpus();
+ pr_err("Failed to update lockup detectors, disabled\n");
+ }
}
+ if (err)
+ watchdog_enabled = 0;
+
return err;
}
-/* prepare/enable/disable routines */
-/* sysctl functions */
-#ifdef CONFIG_SYSCTL
static void watchdog_disable_all_cpus(void)
{
if (watchdog_running) {
}
}
+#ifdef CONFIG_SYSCTL
+
/*
* Update the run state of the lockup detectors.
*/
int err, old, new;
int *watchdog_param = (int *)table->data;
+ get_online_cpus();
mutex_lock(&watchdog_proc_mutex);
+ if (watchdog_suspended) {
+ /* no parameter changes allowed while watchdog is suspended */
+ err = -EAGAIN;
+ goto out;
+ }
+
/*
* If the parameter is being read return the state of the corresponding
* bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
} while (cmpxchg(&watchdog_enabled, old, new) != old);
/*
- * Update the run state of the lockup detectors.
- * Restore 'watchdog_enabled' on failure.
+ * Update the run state of the lockup detectors. There is _no_
+ * need to check the value returned by proc_watchdog_update()
+ * and to restore the previous value of 'watchdog_enabled' as
+ * both lockup detectors are disabled if proc_watchdog_update()
+ * returns an error.
*/
+ if (old == new)
+ goto out;
+
err = proc_watchdog_update();
- if (err)
- watchdog_enabled = old;
}
out:
mutex_unlock(&watchdog_proc_mutex);
+ put_online_cpus();
return err;
}
int proc_watchdog_thresh(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- int err, old;
+ int err, old, new;
+ get_online_cpus();
mutex_lock(&watchdog_proc_mutex);
+ if (watchdog_suspended) {
+ /* no parameter changes allowed while watchdog is suspended */
+ err = -EAGAIN;
+ goto out;
+ }
+
old = ACCESS_ONCE(watchdog_thresh);
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
goto out;
/*
- * Update the sample period.
- * Restore 'watchdog_thresh' on failure.
+ * Update the sample period. Restore on failure.
*/
+ new = ACCESS_ONCE(watchdog_thresh);
+ if (old == new)
+ goto out;
+
set_sample_period();
err = proc_watchdog_update();
- if (err)
+ if (err) {
watchdog_thresh = old;
+ set_sample_period();
+ }
out:
mutex_unlock(&watchdog_proc_mutex);
+ put_online_cpus();
return err;
}
+
+/*
+ * The cpumask is the mask of possible cpus that the watchdog can run
+ * on, not the mask of cpus it is actually running on. This allows the
+ * user to specify a mask that will include cpus that have not yet
+ * been brought online, if desired.
+ */
+int proc_watchdog_cpumask(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int err;
+
+ get_online_cpus();
+ mutex_lock(&watchdog_proc_mutex);
+
+ if (watchdog_suspended) {
+ /* no parameter changes allowed while watchdog is suspended */
+ err = -EAGAIN;
+ goto out;
+ }
+
+ err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
+ if (!err && write) {
+ /* Remove impossible cpus to keep sysctl output cleaner. */
+ cpumask_and(&watchdog_cpumask, &watchdog_cpumask,
+ cpu_possible_mask);
+
+ if (watchdog_running) {
+ /*
+ * Failure would be due to being unable to allocate
+ * a temporary cpumask, so we are likely not in a
+ * position to do much else to make things better.
+ */
+ if (smpboot_update_cpumask_percpu_thread(
+ &watchdog_threads, &watchdog_cpumask) != 0)
+ pr_err("cpumask update failed\n");
+ }
+ }
+out:
+ mutex_unlock(&watchdog_proc_mutex);
+ put_online_cpus();
+ return err;
+}
+
#endif /* CONFIG_SYSCTL */
void __init lockup_detector_init(void)
{
set_sample_period();
+#ifdef CONFIG_NO_HZ_FULL
+ if (tick_nohz_full_enabled()) {
+ pr_info("Disabling watchdog on nohz_full cores by default\n");
+ cpumask_copy(&watchdog_cpumask, housekeeping_mask);
+ } else
+ cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
+#else
+ cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
+#endif
+
if (watchdog_enabled)
watchdog_enable_all_cpus();
}