These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / kernel / watchdog.c
index 3b8e827..201775b 100644 (file)
 #include <linux/sysctl.h>
 #include <linux/smpboot.h>
 #include <linux/sched/rt.h>
+#include <linux/tick.h>
 
 #include <asm/irq_regs.h>
 #include <linux/kvm_para.h>
 #include <linux/perf_event.h>
+#include <linux/kthread.h>
 
 /*
  * The run state of the lockup detectors is controlled by the content of the
@@ -55,11 +57,38 @@ int __read_mostly watchdog_thresh = 10;
 
 #ifdef CONFIG_SMP
 int __read_mostly sysctl_softlockup_all_cpu_backtrace;
+int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
 #else
 #define sysctl_softlockup_all_cpu_backtrace 0
+#define sysctl_hardlockup_all_cpu_backtrace 0
 #endif
+static struct cpumask watchdog_cpumask __read_mostly;
+unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
 
+/* Helper for online, unparked cpus. */
+#define for_each_watchdog_cpu(cpu) \
+       for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
+
+/*
+ * The 'watchdog_running' variable is set to 1 when the watchdog threads
+ * are registered/started and is set to 0 when the watchdog threads are
+ * unregistered/stopped, so it is an indicator whether the threads exist.
+ */
 static int __read_mostly watchdog_running;
+/*
+ * If a subsystem has a need to deactivate the watchdog temporarily, it
+ * can use the suspend/resume interface to achieve this. The content of
+ * the 'watchdog_suspended' variable reflects this state. Existing threads
+ * are parked/unparked by the lockup_detector_{suspend|resume} functions
+ * (see comment blocks pertaining to those functions for further details).
+ *
+ * 'watchdog_suspended' also prevents threads from being registered/started
+ * or unregistered/stopped via parameters in /proc/sys/kernel, so the state
+ * of 'watchdog_running' cannot change while the watchdog is deactivated
+ * temporarily (see related code in 'proc' handlers).
+ */
+static int __read_mostly watchdog_suspended;
+
 static u64 __read_mostly sample_period;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
@@ -83,8 +112,9 @@ static unsigned long soft_lockup_nmi_warn;
  * Should we panic when a soft-lockup or hard-lockup occurs:
  */
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
-static int hardlockup_panic =
+unsigned int __read_mostly hardlockup_panic =
                        CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+static unsigned long hardlockup_allcpu_dumped;
 /*
  * We may not want to enable hard lockup detection by default in all cases,
  * for example when running the kernel as a guest on a hypervisor. In these
@@ -146,6 +176,13 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str)
        return 1;
 }
 __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+static int __init hardlockup_all_cpu_backtrace_setup(char *str)
+{
+       sysctl_hardlockup_all_cpu_backtrace =
+               !!simple_strtol(str, NULL, 0);
+       return 1;
+}
+__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
 #endif
 
 /*
@@ -207,7 +244,7 @@ void touch_all_softlockup_watchdogs(void)
         * do we care if a 0 races with a timestamp?
         * all it means is the softlock check starts one cycle later
         */
-       for_each_online_cpu(cpu)
+       for_each_watchdog_cpu(cpu)
                per_cpu(watchdog_touch_ts, cpu) = 0;
 }
 
@@ -236,15 +273,15 @@ void touch_softlockup_watchdog_sync(void)
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 /* watchdog detector functions */
-static int is_hardlockup(void)
+static bool is_hardlockup(void)
 {
        unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
 
        if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
-               return 1;
+               return true;
 
        __this_cpu_write(hrtimer_interrupts_saved, hrint);
-       return 0;
+       return false;
 }
 #endif
 
@@ -252,7 +289,7 @@ static int is_softlockup(unsigned long touch_ts)
 {
        unsigned long now = get_timestamp();
 
-       if (watchdog_enabled & SOFT_WATCHDOG_ENABLED) {
+       if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
                /* Warn about unreasonable delays. */
                if (time_after(now, touch_ts + get_softlockup_thresh()))
                        return now - touch_ts;
@@ -293,6 +330,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
         */
        if (is_hardlockup()) {
                int this_cpu = smp_processor_id();
+               struct pt_regs *regs = get_irq_regs();
 
                /* only print hardlockups once */
                if (__this_cpu_read(hard_watchdog_warn) == true)
@@ -303,15 +341,27 @@ static void watchdog_overflow_callback(struct perf_event *event,
                 */
                printk_kill();
 
-               if (hardlockup_panic) {
-                       panic("Watchdog detected hard LOCKUP on cpu %d",
-                             this_cpu);
-               } else {
-                       raw_spin_lock(&watchdog_output_lock);
-                       WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
-                            this_cpu);
-                       raw_spin_unlock(&watchdog_output_lock);
-               }
+               raw_spin_lock(&watchdog_output_lock);
+
+               pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+               print_modules();
+               print_irqtrace_events(current);
+               if (regs)
+                       show_regs(regs);
+               else
+                       dump_stack();
+
+               /*
+                * Perform all-CPU dump only once to avoid multiple hardlockups
+                * generating interleaving traces
+                */
+               if (sysctl_hardlockup_all_cpu_backtrace &&
+                               !test_and_set_bit(0, &hardlockup_allcpu_dumped))
+                       trigger_allbutself_cpu_backtrace();
+
+               raw_spin_unlock(&watchdog_output_lock);
+               if (hardlockup_panic)
+                       panic("Hard LOCKUP");
 
                __this_cpu_write(hard_watchdog_warn, true);
                return;
@@ -330,6 +380,9 @@ static void watchdog_interrupt_count(void)
 static int watchdog_nmi_enable(unsigned int cpu);
 static void watchdog_nmi_disable(unsigned int cpu);
 
+static int watchdog_enable_all_cpus(void);
+static void watchdog_disable_all_cpus(void);
+
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
@@ -617,46 +670,9 @@ static void watchdog_nmi_disable(unsigned int cpu)
        }
 }
 
-void watchdog_nmi_enable_all(void)
-{
-       int cpu;
-
-       mutex_lock(&watchdog_proc_mutex);
-
-       if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
-               goto unlock;
-
-       get_online_cpus();
-       for_each_online_cpu(cpu)
-               watchdog_nmi_enable(cpu);
-       put_online_cpus();
-
-unlock:
-       mutex_unlock(&watchdog_proc_mutex);
-}
-
-void watchdog_nmi_disable_all(void)
-{
-       int cpu;
-
-       mutex_lock(&watchdog_proc_mutex);
-
-       if (!watchdog_running)
-               goto unlock;
-
-       get_online_cpus();
-       for_each_online_cpu(cpu)
-               watchdog_nmi_disable(cpu);
-       put_online_cpus();
-
-unlock:
-       mutex_unlock(&watchdog_proc_mutex);
-}
 #else
 static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
 static void watchdog_nmi_disable(unsigned int cpu) { return; }
-void watchdog_nmi_enable_all(void) {}
-void watchdog_nmi_disable_all(void) {}
 #endif /* CONFIG_HARDLOCKUP_DETECTOR */
 
 static struct smp_hotplug_thread watchdog_threads = {
@@ -670,46 +686,107 @@ static struct smp_hotplug_thread watchdog_threads = {
        .unpark                 = watchdog_enable,
 };
 
-static void restart_watchdog_hrtimer(void *info)
+/*
+ * park all watchdog threads that are specified in 'watchdog_cpumask'
+ *
+ * This function returns an error if kthread_park() of a watchdog thread
+ * fails. In this situation, the watchdog threads of some CPUs can already
+ * be parked and the watchdog threads of other CPUs can still be runnable.
+ * Callers are expected to handle this special condition as appropriate in
+ * their context.
+ *
+ * This function may only be called in a context that is protected against
+ * races with CPU hotplug - for example, via get_online_cpus().
+ */
+static int watchdog_park_threads(void)
+{
+       int cpu, ret = 0;
+
+       for_each_watchdog_cpu(cpu) {
+               ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
+               if (ret)
+                       break;
+       }
+
+       return ret;
+}
+
+/*
+ * unpark all watchdog threads that are specified in 'watchdog_cpumask'
+ *
+ * This function may only be called in a context that is protected against
+ * races with CPU hotplug - for example, via get_online_cpus().
+ */
+static void watchdog_unpark_threads(void)
 {
-       struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
-       int ret;
+       int cpu;
 
+       for_each_watchdog_cpu(cpu)
+               kthread_unpark(per_cpu(softlockup_watchdog, cpu));
+}
+
+/*
+ * Suspend the hard and soft lockup detector by parking the watchdog threads.
+ */
+int lockup_detector_suspend(void)
+{
+       int ret = 0;
+
+       get_online_cpus();
+       mutex_lock(&watchdog_proc_mutex);
        /*
-        * No need to cancel and restart hrtimer if it is currently executing
-        * because it will reprogram itself with the new period now.
-        * We should never see it unqueued here because we are running per-cpu
-        * with interrupts disabled.
+        * Multiple suspend requests can be active in parallel (counted by
+        * the 'watchdog_suspended' variable). If the watchdog threads are
+        * running, the first caller takes care that they will be parked.
+        * The state of 'watchdog_running' cannot change while a suspend
+        * request is active (see related code in 'proc' handlers).
         */
-       ret = hrtimer_try_to_cancel(hrtimer);
-       if (ret == 1)
-               hrtimer_start(hrtimer, ns_to_ktime(sample_period),
-                               HRTIMER_MODE_REL_PINNED);
+       if (watchdog_running && !watchdog_suspended)
+               ret = watchdog_park_threads();
+
+       if (ret == 0)
+               watchdog_suspended++;
+       else {
+               watchdog_disable_all_cpus();
+               pr_err("Failed to suspend lockup detectors, disabled\n");
+               watchdog_enabled = 0;
+       }
+
+       mutex_unlock(&watchdog_proc_mutex);
+
+       return ret;
 }
 
-static void update_watchdog(int cpu)
+/*
+ * Resume the hard and soft lockup detector by unparking the watchdog threads.
+ */
+void lockup_detector_resume(void)
 {
+       mutex_lock(&watchdog_proc_mutex);
+
+       watchdog_suspended--;
        /*
-        * Make sure that perf event counter will adopt to a new
-        * sampling period. Updating the sampling period directly would
-        * be much nicer but we do not have an API for that now so
-        * let's use a big hammer.
-        * Hrtimer will adopt the new period on the next tick but this
-        * might be late already so we have to restart the timer as well.
+        * The watchdog threads are unparked if they were previously running
+        * and if there is no more active suspend request.
         */
-       watchdog_nmi_disable(cpu);
-       smp_call_function_single(cpu, restart_watchdog_hrtimer, NULL, 1);
-       watchdog_nmi_enable(cpu);
+       if (watchdog_running && !watchdog_suspended)
+               watchdog_unpark_threads();
+
+       mutex_unlock(&watchdog_proc_mutex);
+       put_online_cpus();
 }
 
-static void update_watchdog_all_cpus(void)
+static int update_watchdog_all_cpus(void)
 {
-       int cpu;
+       int ret;
 
-       get_online_cpus();
-       for_each_online_cpu(cpu)
-               update_watchdog(cpu);
-       put_online_cpus();
+       ret = watchdog_park_threads();
+       if (ret)
+               return ret;
+
+       watchdog_unpark_threads();
+
+       return 0;
 }
 
 static int watchdog_enable_all_cpus(void)
@@ -717,7 +794,8 @@ static int watchdog_enable_all_cpus(void)
        int err = 0;
 
        if (!watchdog_running) {
-               err = smpboot_register_percpu_thread(&watchdog_threads);
+               err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
+                                                            &watchdog_cpumask);
                if (err)
                        pr_err("Failed to create watchdog threads, disabled\n");
                else
@@ -727,15 +805,20 @@ static int watchdog_enable_all_cpus(void)
                 * Enable/disable the lockup detectors or
                 * change the sample period 'on the fly'.
                 */
-               update_watchdog_all_cpus();
+               err = update_watchdog_all_cpus();
+
+               if (err) {
+                       watchdog_disable_all_cpus();
+                       pr_err("Failed to update lockup detectors, disabled\n");
+               }
        }
 
+       if (err)
+               watchdog_enabled = 0;
+
        return err;
 }
 
-/* prepare/enable/disable routines */
-/* sysctl functions */
-#ifdef CONFIG_SYSCTL
 static void watchdog_disable_all_cpus(void)
 {
        if (watchdog_running) {
@@ -744,6 +827,8 @@ static void watchdog_disable_all_cpus(void)
        }
 }
 
+#ifdef CONFIG_SYSCTL
+
 /*
  * Update the run state of the lockup detectors.
  */
@@ -785,8 +870,15 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write,
        int err, old, new;
        int *watchdog_param = (int *)table->data;
 
+       get_online_cpus();
        mutex_lock(&watchdog_proc_mutex);
 
+       if (watchdog_suspended) {
+               /* no parameter changes allowed while watchdog is suspended */
+               err = -EAGAIN;
+               goto out;
+       }
+
        /*
         * If the parameter is being read return the state of the corresponding
         * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
@@ -820,15 +912,17 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write,
                } while (cmpxchg(&watchdog_enabled, old, new) != old);
 
                /*
-                * Update the run state of the lockup detectors.
-                * Restore 'watchdog_enabled' on failure.
+                * Update the run state of the lockup detectors. There is _no_
+                * need to check the value returned by proc_watchdog_update()
+                * and to restore the previous value of 'watchdog_enabled' as
+                * both lockup detectors are disabled if proc_watchdog_update()
+                * returns an error.
                 */
                err = proc_watchdog_update();
-               if (err)
-                       watchdog_enabled = old;
        }
 out:
        mutex_unlock(&watchdog_proc_mutex);
+       put_online_cpus();
        return err;
 }
 
@@ -870,8 +964,15 @@ int proc_watchdog_thresh(struct ctl_table *table, int write,
 {
        int err, old;
 
+       get_online_cpus();
        mutex_lock(&watchdog_proc_mutex);
 
+       if (watchdog_suspended) {
+               /* no parameter changes allowed while watchdog is suspended */
+               err = -EAGAIN;
+               goto out;
+       }
+
        old = ACCESS_ONCE(watchdog_thresh);
        err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 
@@ -879,23 +980,79 @@ int proc_watchdog_thresh(struct ctl_table *table, int write,
                goto out;
 
        /*
-        * Update the sample period.
-        * Restore 'watchdog_thresh' on failure.
+        * Update the sample period. Restore on failure.
         */
        set_sample_period();
        err = proc_watchdog_update();
-       if (err)
+       if (err) {
                watchdog_thresh = old;
+               set_sample_period();
+       }
 out:
        mutex_unlock(&watchdog_proc_mutex);
+       put_online_cpus();
        return err;
 }
+
+/*
+ * The cpumask is the mask of possible cpus that the watchdog can run
+ * on, not the mask of cpus it is actually running on.  This allows the
+ * user to specify a mask that will include cpus that have not yet
+ * been brought online, if desired.
+ */
+int proc_watchdog_cpumask(struct ctl_table *table, int write,
+                         void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int err;
+
+       get_online_cpus();
+       mutex_lock(&watchdog_proc_mutex);
+
+       if (watchdog_suspended) {
+               /* no parameter changes allowed while watchdog is suspended */
+               err = -EAGAIN;
+               goto out;
+       }
+
+       err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
+       if (!err && write) {
+               /* Remove impossible cpus to keep sysctl output cleaner. */
+               cpumask_and(&watchdog_cpumask, &watchdog_cpumask,
+                           cpu_possible_mask);
+
+               if (watchdog_running) {
+                       /*
+                        * Failure would be due to being unable to allocate
+                        * a temporary cpumask, so we are likely not in a
+                        * position to do much else to make things better.
+                        */
+                       if (smpboot_update_cpumask_percpu_thread(
+                                   &watchdog_threads, &watchdog_cpumask) != 0)
+                               pr_err("cpumask update failed\n");
+               }
+       }
+out:
+       mutex_unlock(&watchdog_proc_mutex);
+       put_online_cpus();
+       return err;
+}
+
 #endif /* CONFIG_SYSCTL */
 
 void __init lockup_detector_init(void)
 {
        set_sample_period();
 
+#ifdef CONFIG_NO_HZ_FULL
+       if (tick_nohz_full_enabled()) {
+               pr_info("Disabling watchdog on nohz_full cores by default\n");
+               cpumask_copy(&watchdog_cpumask, housekeeping_mask);
+       } else
+               cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
+#else
+       cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
+#endif
+
        if (watchdog_enabled)
                watchdog_enable_all_cpus();
 }