These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / kernel / time / tick-broadcast.c
index 7e8ca4f..f6aae79 100644 (file)
@@ -159,7 +159,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 {
        struct clock_event_device *bc = tick_broadcast_device.evtdev;
        unsigned long flags;
-       int ret;
+       int ret = 0;
 
        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
@@ -221,13 +221,14 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
                         * If we kept the cpu in the broadcast mask,
                         * tell the caller to leave the per cpu device
                         * in shutdown state. The periodic interrupt
-                        * is delivered by the broadcast device.
+                        * is delivered by the broadcast device, if
+                        * the broadcast device exists and is not
+                        * hrtimer based.
                         */
-                       ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
+                       if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER))
+                               ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
                        break;
                default:
-                       /* Nothing to do */
-                       ret = 0;
                        break;
                }
        }
@@ -255,18 +256,32 @@ int tick_receive_broadcast(void)
 /*
  * Broadcast the event to the cpus, which are set in the mask (mangled).
  */
-static void tick_do_broadcast(struct cpumask *mask)
+static bool tick_do_broadcast(struct cpumask *mask)
 {
        int cpu = smp_processor_id();
        struct tick_device *td;
+       bool local = false;
 
        /*
         * Check, if the current cpu is in the mask
         */
        if (cpumask_test_cpu(cpu, mask)) {
+               struct clock_event_device *bc = tick_broadcast_device.evtdev;
+
                cpumask_clear_cpu(cpu, mask);
-               td = &per_cpu(tick_cpu_device, cpu);
-               td->evtdev->event_handler(td->evtdev);
+               /*
+                * We only run the local handler, if the broadcast
+                * device is not hrtimer based. Otherwise we run into
+                * a hrtimer recursion.
+                *
+                * local timer_interrupt()
+                *   local_handler()
+                *     expire_hrtimers()
+                *       bc_handler()
+                *         local_handler()
+                *           expire_hrtimers()
+                */
+               local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER);
        }
 
        if (!cpumask_empty(mask)) {
@@ -279,16 +294,17 @@ static void tick_do_broadcast(struct cpumask *mask)
                td = &per_cpu(tick_cpu_device, cpumask_first(mask));
                td->evtdev->broadcast(mask);
        }
+       return local;
 }
 
 /*
  * Periodic broadcast:
  * - invoke the broadcast handlers
  */
-static void tick_do_periodic_broadcast(void)
+static bool tick_do_periodic_broadcast(void)
 {
        cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
-       tick_do_broadcast(tmpmask);
+       return tick_do_broadcast(tmpmask);
 }
 
 /*
@@ -296,34 +312,33 @@ static void tick_do_periodic_broadcast(void)
  */
 static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
 {
-       ktime_t next;
+       struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
+       bool bc_local;
 
        raw_spin_lock(&tick_broadcast_lock);
 
-       tick_do_periodic_broadcast();
+       /* Handle spurious interrupts gracefully */
+       if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) {
+               raw_spin_unlock(&tick_broadcast_lock);
+               return;
+       }
 
-       /*
-        * The device is in periodic mode. No reprogramming necessary:
-        */
-       if (dev->state == CLOCK_EVT_STATE_PERIODIC)
-               goto unlock;
+       bc_local = tick_do_periodic_broadcast();
 
-       /*
-        * Setup the next period for devices, which do not have
-        * periodic mode. We read dev->next_event first and add to it
-        * when the event already expired. clockevents_program_event()
-        * sets dev->next_event only when the event is really
-        * programmed to the device.
-        */
-       for (next = dev->next_event; ;) {
-               next = ktime_add(next, tick_period);
+       if (clockevent_state_oneshot(dev)) {
+               ktime_t next = ktime_add(dev->next_event, tick_period);
 
-               if (!clockevents_program_event(dev, next, false))
-                       goto unlock;
-               tick_do_periodic_broadcast();
+               clockevents_program_event(dev, next, true);
        }
-unlock:
        raw_spin_unlock(&tick_broadcast_lock);
+
+       /*
+        * We run the handler of the local cpu after dropping
+        * tick_broadcast_lock because the handler might deadlock when
+        * trying to switch to oneshot mode.
+        */
+       if (bc_local)
+               td->evtdev->event_handler(td->evtdev);
 }
 
 /**
@@ -366,8 +381,16 @@ void tick_broadcast_control(enum tick_broadcast_mode mode)
        case TICK_BROADCAST_ON:
                cpumask_set_cpu(cpu, tick_broadcast_on);
                if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
-                       if (tick_broadcast_device.mode ==
-                           TICKDEV_MODE_PERIODIC)
+                       /*
+                        * Only shutdown the cpu local device, if:
+                        *
+                        * - the broadcast device exists
+                        * - the broadcast device is not a hrtimer based one
+                        * - the broadcast device is in periodic mode to
+                        *   avoid a hickup during switch to oneshot mode
+                        */
+                       if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) &&
+                           tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
                                clockevents_shutdown(dev);
                }
                break;
@@ -386,14 +409,16 @@ void tick_broadcast_control(enum tick_broadcast_mode mode)
                break;
        }
 
-       if (cpumask_empty(tick_broadcast_mask)) {
-               if (!bc_stopped)
-                       clockevents_shutdown(bc);
-       } else if (bc_stopped) {
-               if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
-                       tick_broadcast_start_periodic(bc);
-               else
-                       tick_broadcast_setup_oneshot(bc);
+       if (bc) {
+               if (cpumask_empty(tick_broadcast_mask)) {
+                       if (!bc_stopped)
+                               clockevents_shutdown(bc);
+               } else if (bc_stopped) {
+                       if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
+                               tick_broadcast_start_periodic(bc);
+                       else
+                               tick_broadcast_setup_oneshot(bc);
+               }
        }
        raw_spin_unlock(&tick_broadcast_lock);
 }
@@ -532,23 +557,19 @@ static void tick_broadcast_set_affinity(struct clock_event_device *bc,
        irq_set_affinity(bc->irq, bc->cpumask);
 }
 
-static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
-                                   ktime_t expires, int force)
+static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
+                                    ktime_t expires)
 {
-       int ret;
-
-       if (bc->state != CLOCK_EVT_STATE_ONESHOT)
-               clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
+       if (!clockevent_state_oneshot(bc))
+               clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
 
-       ret = clockevents_program_event(bc, expires, force);
-       if (!ret)
-               tick_broadcast_set_affinity(bc, cpumask_of(cpu));
-       return ret;
+       clockevents_program_event(bc, expires, 1);
+       tick_broadcast_set_affinity(bc, cpumask_of(cpu));
 }
 
 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
 {
-       clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
+       clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
 }
 
 /*
@@ -566,7 +587,7 @@ void tick_check_oneshot_broadcast_this_cpu(void)
                 * switched over, leave the device alone.
                 */
                if (td->mode == TICKDEV_MODE_ONESHOT) {
-                       clockevents_set_state(td->evtdev,
+                       clockevents_switch_state(td->evtdev,
                                              CLOCK_EVT_STATE_ONESHOT);
                }
        }
@@ -580,9 +601,9 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
        struct tick_device *td;
        ktime_t now, next_event;
        int cpu, next_cpu = 0;
+       bool bc_local;
 
        raw_spin_lock(&tick_broadcast_lock);
-again:
        dev->next_event.tv64 = KTIME_MAX;
        next_event.tv64 = KTIME_MAX;
        cpumask_clear(tmpmask);
@@ -624,7 +645,7 @@ again:
        /*
         * Wakeup the cpus which have an expired event.
         */
-       tick_do_broadcast(tmpmask);
+       bc_local = tick_do_broadcast(tmpmask);
 
        /*
         * Two reasons for reprogram:
@@ -636,15 +657,15 @@ again:
         * - There are pending events on sleeping CPUs which were not
         * in the event mask
         */
-       if (next_event.tv64 != KTIME_MAX) {
-               /*
-                * Rearm the broadcast device. If event expired,
-                * repeat the above
-                */
-               if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
-                       goto again;
-       }
+       if (next_event.tv64 != KTIME_MAX)
+               tick_broadcast_set_event(dev, next_cpu, next_event);
+
        raw_spin_unlock(&tick_broadcast_lock);
+
+       if (bc_local) {
+               td = this_cpu_ptr(&tick_cpu_device);
+               td->evtdev->event_handler(td->evtdev);
+       }
 }
 
 static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
@@ -670,77 +691,88 @@ static void broadcast_shutdown_local(struct clock_event_device *bc,
                if (dev->next_event.tv64 < bc->next_event.tv64)
                        return;
        }
-       clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
+       clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
 }
 
-/**
- * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode
- * @state:     The target state (enter/exit)
- *
- * The system enters/leaves a state, where affected devices might stop
- * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
- *
- * Called with interrupts disabled, so clockevents_lock is not
- * required here because the local clock event device cannot go away
- * under us.
- */
-int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
+int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 {
        struct clock_event_device *bc, *dev;
-       struct tick_device *td;
        int cpu, ret = 0;
        ktime_t now;
 
        /*
-        * Periodic mode does not care about the enter/exit of power
-        * states
+        * If there is no broadcast device, tell the caller not to go
+        * into deep idle.
         */
-       if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
-               return 0;
+       if (!tick_broadcast_device.evtdev)
+               return -EBUSY;
 
-       /*
-        * We are called with preemtion disabled from the depth of the
-        * idle code, so we can't be moved away.
-        */
-       td = this_cpu_ptr(&tick_cpu_device);
-       dev = td->evtdev;
-
-       if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
-               return 0;
+       dev = this_cpu_ptr(&tick_cpu_device)->evtdev;
 
        raw_spin_lock(&tick_broadcast_lock);
        bc = tick_broadcast_device.evtdev;
        cpu = smp_processor_id();
 
        if (state == TICK_BROADCAST_ENTER) {
+               /*
+                * If the current CPU owns the hrtimer broadcast
+                * mechanism, it cannot go deep idle and we do not add
+                * the CPU to the broadcast mask. We don't have to go
+                * through the EXIT path as the local timer is not
+                * shutdown.
+                */
+               ret = broadcast_needs_cpu(bc, cpu);
+               if (ret)
+                       goto out;
+
+               /*
+                * If the broadcast device is in periodic mode, we
+                * return.
+                */
+               if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
+                       /* If it is a hrtimer based broadcast, return busy */
+                       if (bc->features & CLOCK_EVT_FEAT_HRTIMER)
+                               ret = -EBUSY;
+                       goto out;
+               }
+
                if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
                        WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
+
+                       /* Conditionally shut down the local timer. */
                        broadcast_shutdown_local(bc, dev);
+
                        /*
                         * We only reprogram the broadcast timer if we
                         * did not mark ourself in the force mask and
                         * if the cpu local event is earlier than the
                         * broadcast event. If the current CPU is in
                         * the force mask, then we are going to be
-                        * woken by the IPI right away.
+                        * woken by the IPI right away; we return
+                        * busy, so the CPU does not try to go deep
+                        * idle.
                         */
-                       if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
-                           dev->next_event.tv64 < bc->next_event.tv64)
-                               tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
+                       if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) {
+                               ret = -EBUSY;
+                       } else if (dev->next_event.tv64 < bc->next_event.tv64) {
+                               tick_broadcast_set_event(bc, cpu, dev->next_event);
+                               /*
+                                * In case of hrtimer broadcasts the
+                                * programming might have moved the
+                                * timer to this cpu. If yes, remove
+                                * us from the broadcast mask and
+                                * return busy.
+                                */
+                               ret = broadcast_needs_cpu(bc, cpu);
+                               if (ret) {
+                                       cpumask_clear_cpu(cpu,
+                                               tick_broadcast_oneshot_mask);
+                               }
+                       }
                }
-               /*
-                * If the current CPU owns the hrtimer broadcast
-                * mechanism, it cannot go deep idle and we remove the
-                * CPU from the broadcast mask. We don't have to go
-                * through the EXIT path as the local timer is not
-                * shutdown.
-                */
-               ret = broadcast_needs_cpu(bc, cpu);
-               if (ret)
-                       cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
        } else {
                if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
-                       clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
+                       clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
                        /*
                         * The cpu which was handling the broadcast
                         * timer marked this cpu in the broadcast
@@ -807,7 +839,6 @@ out:
        raw_spin_unlock(&tick_broadcast_lock);
        return ret;
 }
-EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);
 
 /*
  * Reset the one shot broadcast for a cpu
@@ -842,7 +873,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 
        /* Set it up only once ! */
        if (bc->event_handler != tick_handle_oneshot_broadcast) {
-               int was_periodic = bc->state == CLOCK_EVT_STATE_PERIODIC;
+               int was_periodic = clockevent_state_periodic(bc);
 
                bc->event_handler = tick_handle_oneshot_broadcast;
 
@@ -858,10 +889,10 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
                           tick_broadcast_oneshot_mask, tmpmask);
 
                if (was_periodic && !cpumask_empty(tmpmask)) {
-                       clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
+                       clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
                        tick_broadcast_init_next_event(tmpmask,
                                                       tick_next_period);
-                       tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
+                       tick_broadcast_set_event(bc, cpu, tick_next_period);
                } else
                        bc->next_event.tv64 = KTIME_MAX;
        } else {
@@ -949,6 +980,16 @@ bool tick_broadcast_oneshot_available(void)
        return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
 }
 
+#else
+int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
+{
+       struct clock_event_device *bc = tick_broadcast_device.evtdev;
+
+       if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER))
+               return -EBUSY;
+
+       return 0;
+}
 #endif
 
 void __init tick_broadcast_init(void)