These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / kernel / stop_machine.c
index 1af29ad..f84d3b4 100644 (file)
@@ -30,18 +30,21 @@ struct cpu_stop_done {
        atomic_t                nr_todo;        /* nr left to execute */
        bool                    executed;       /* actually executed? */
        int                     ret;            /* collected return value */
-       struct task_struct      *waiter;        /* woken when nr_todo reaches 0 */
+       struct completion       completion;     /* fired if nr_todo reaches 0 */
 };
 
 /* the actual stopper, one per every possible cpu, enabled on online cpus */
 struct cpu_stopper {
+       struct task_struct      *thread;
+
        raw_spinlock_t          lock;
        bool                    enabled;        /* is this stopper enabled? */
        struct list_head        works;          /* list of pending works */
+
+       struct cpu_stop_work    stop_work;      /* for stop_cpus */
 };
 
 static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
-static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task);
 static bool stop_machine_initialized = false;
 
 /*
@@ -56,7 +59,7 @@ static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
 {
        memset(done, 0, sizeof(*done));
        atomic_set(&done->nr_todo, nr_todo);
-       done->waiter = current;
+       init_completion(&done->completion);
 }
 
 /* signal completion unless @done is NULL */
@@ -65,48 +68,32 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
        if (done) {
                if (executed)
                        done->executed = true;
-               if (atomic_dec_and_test(&done->nr_todo)) {
-                       wake_up_process(done->waiter);
-                       done->waiter = NULL;
-               }
+               if (atomic_dec_and_test(&done->nr_todo))
+                       complete(&done->completion);
        }
 }
 
+static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
+                                       struct cpu_stop_work *work)
+{
+       list_add_tail(&work->list, &stopper->works);
+       wake_up_process(stopper->thread);
+}
+
 /* queue @work to @stopper.  if offline, @work is completed immediately */
 static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
 {
        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
-       struct task_struct *p = per_cpu(cpu_stopper_task, cpu);
-
        unsigned long flags;
 
        raw_spin_lock_irqsave(&stopper->lock, flags);
-
-       if (stopper->enabled) {
-               list_add_tail(&work->list, &stopper->works);
-               wake_up_process(p);
-       } else
+       if (stopper->enabled)
+               __cpu_stop_queue_work(stopper, work);
+       else
                cpu_stop_signal_done(work->done, false);
-
        raw_spin_unlock_irqrestore(&stopper->lock, flags);
 }
 
-static void wait_for_stop_done(struct cpu_stop_done *done)
-{
-       set_current_state(TASK_UNINTERRUPTIBLE);
-       while (atomic_read(&done->nr_todo)) {
-               schedule();
-               set_current_state(TASK_UNINTERRUPTIBLE);
-       }
-       /*
-        * We need to wait until cpu_stop_signal_done() has cleared
-        * done->waiter.
-        */
-       while (done->waiter)
-               cpu_relax();
-       set_current_state(TASK_RUNNING);
-}
-
 /**
  * stop_one_cpu - stop a cpu
  * @cpu: cpu to stop
@@ -138,7 +125,7 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
 
        cpu_stop_init_done(&done, 1);
        cpu_stop_queue_work(cpu, &work);
-       wait_for_stop_done(&done);
+       wait_for_completion(&done.completion);
        return done.executed ? done.ret : -ENOENT;
 }
 
@@ -157,7 +144,7 @@ enum multi_stop_state {
 };
 
 struct multi_stop_data {
-       int                     (*fn)(void *);
+       cpu_stop_fn_t           fn;
        void                    *data;
        /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
        unsigned int            num_threads;
@@ -229,25 +216,31 @@ static int multi_cpu_stop(void *data)
        return err;
 }
 
-struct irq_cpu_stop_queue_work_info {
-       int cpu1;
-       int cpu2;
-       struct cpu_stop_work *work1;
-       struct cpu_stop_work *work2;
-};
-
-/*
- * This function is always run with irqs and preemption disabled.
- * This guarantees that both work1 and work2 get queued, before
- * our local migrate thread gets the chance to preempt us.
- */
-static void irq_cpu_stop_queue_work(void *arg)
+static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
+                                   int cpu2, struct cpu_stop_work *work2)
 {
-       struct irq_cpu_stop_queue_work_info *info = arg;
-       cpu_stop_queue_work(info->cpu1, info->work1);
-       cpu_stop_queue_work(info->cpu2, info->work2);
-}
+       struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
+       struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
+       int err;
+
+       lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
+       raw_spin_lock_irq(&stopper1->lock);
+       raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
+
+       err = -ENOENT;
+       if (!stopper1->enabled || !stopper2->enabled)
+               goto unlock;
+
+       err = 0;
+       __cpu_stop_queue_work(stopper1, work1);
+       __cpu_stop_queue_work(stopper2, work2);
+unlock:
+       raw_spin_unlock(&stopper2->lock);
+       raw_spin_unlock_irq(&stopper1->lock);
+       lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
 
+       return err;
+}
 /**
  * stop_two_cpus - stops two cpus
  * @cpu1: the cpu to stop
@@ -263,7 +256,6 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
 {
        struct cpu_stop_done done;
        struct cpu_stop_work work1, work2;
-       struct irq_cpu_stop_queue_work_info call_args;
        struct multi_stop_data msdata;
 
        preempt_disable_nort();
@@ -280,42 +272,19 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
                .done = &done
        };
 
-       call_args = (struct irq_cpu_stop_queue_work_info){
-               .cpu1 = cpu1,
-               .cpu2 = cpu2,
-               .work1 = &work1,
-               .work2 = &work2,
-       };
-
        cpu_stop_init_done(&done, 2);
        set_state(&msdata, MULTI_STOP_PREPARE);
 
-       /*
-        * If we observe both CPUs active we know _cpu_down() cannot yet have
-        * queued its stop_machine works and therefore ours will get executed
-        * first. Or its not either one of our CPUs that's getting unplugged,
-        * in which case we don't care.
-        *
-        * This relies on the stopper workqueues to be FIFO.
-        */
-       if (!cpu_active(cpu1) || !cpu_active(cpu2)) {
+       if (cpu1 > cpu2)
+               swap(cpu1, cpu2);
+       if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2)) {
                preempt_enable_nort();
                return -ENOENT;
        }
 
-       lg_local_lock(&stop_cpus_lock);
-       /*
-        * Queuing needs to be done by the lowest numbered CPU, to ensure
-        * that works are always queued in the same order on every CPU.
-        * This prevents deadlocks.
-        */
-       smp_call_function_single(min(cpu1, cpu2),
-                                &irq_cpu_stop_queue_work,
-                                &call_args, 1);
-       lg_local_unlock(&stop_cpus_lock);
        preempt_enable_nort();
 
-       wait_for_stop_done(&done);
+       wait_for_completion(&done.completion);
 
        return done.executed ? done.ret : -ENOENT;
 }
@@ -343,7 +312,6 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 
 /* static data for stop_cpus */
 static DEFINE_MUTEX(stop_cpus_mutex);
-static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
 
 static void queue_stop_cpus_work(const struct cpumask *cpumask,
                                 cpu_stop_fn_t fn, void *arg,
@@ -352,14 +320,6 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
        struct cpu_stop_work *work;
        unsigned int cpu;
 
-       /* initialize works and done */
-       for_each_cpu(cpu, cpumask) {
-               work = &per_cpu(stop_cpus_work, cpu);
-               work->fn = fn;
-               work->arg = arg;
-               work->done = done;
-       }
-
        /*
         * Make sure that all work is queued on all cpus before
         * any of the cpus can execute it.
@@ -368,8 +328,14 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
                lg_global_lock(&stop_cpus_lock);
        else
                lg_global_trylock_relax(&stop_cpus_lock);
-       for_each_cpu(cpu, cpumask)
-               cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
+
+       for_each_cpu(cpu, cpumask) {
+               work = &per_cpu(cpu_stopper.stop_work, cpu);
+               work->fn = fn;
+               work->arg = arg;
+               work->done = done;
+               cpu_stop_queue_work(cpu, work);
+       }
        lg_global_unlock(&stop_cpus_lock);
 }
 
@@ -380,7 +346,7 @@ static int __stop_cpus(const struct cpumask *cpumask,
 
        cpu_stop_init_done(&done, cpumask_weight(cpumask));
        queue_stop_cpus_work(cpumask, fn, arg, &done, false);
-       wait_for_stop_done(&done);
+       wait_for_completion(&done.completion);
        return done.executed ? done.ret : -ENOENT;
 }
 
@@ -511,56 +477,52 @@ repeat:
                          kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
                                          ksym_buf), arg);
 
-               /*
-                * Make sure that the wakeup and setting done->waiter
-                * to NULL is atomic.
-                */
-               local_irq_disable();
                cpu_stop_signal_done(done, true);
-               local_irq_enable();
                goto repeat;
        }
 }
 
+void stop_machine_park(int cpu)
+{
+       struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+       /*
+        * Lockless. cpu_stopper_thread() will take stopper->lock and flush
+        * the pending works before it parks, until then it is fine to queue
+        * the new works.
+        */
+       stopper->enabled = false;
+       kthread_park(stopper->thread);
+}
+
 extern void sched_set_stop_task(int cpu, struct task_struct *stop);
 
 static void cpu_stop_create(unsigned int cpu)
 {
-       sched_set_stop_task(cpu, per_cpu(cpu_stopper_task, cpu));
+       sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
 }
 
 static void cpu_stop_park(unsigned int cpu)
 {
        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
-       struct cpu_stop_work *work;
-       unsigned long flags;
 
-       /* drain remaining works */
-       raw_spin_lock_irqsave(&stopper->lock, flags);
-       list_for_each_entry(work, &stopper->works, list)
-               cpu_stop_signal_done(work->done, false);
-       stopper->enabled = false;
-       raw_spin_unlock_irqrestore(&stopper->lock, flags);
+       WARN_ON(!list_empty(&stopper->works));
 }
 
-static void cpu_stop_unpark(unsigned int cpu)
+void stop_machine_unpark(int cpu)
 {
        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 
-       raw_spin_lock_irq(&stopper->lock);
        stopper->enabled = true;
-       raw_spin_unlock_irq(&stopper->lock);
+       kthread_unpark(stopper->thread);
 }
 
 static struct smp_hotplug_thread cpu_stop_threads = {
-       .store                  = &cpu_stopper_task,
+       .store                  = &cpu_stopper.thread,
        .thread_should_run      = cpu_stop_should_run,
        .thread_fn              = cpu_stopper_thread,
        .thread_comm            = "migration/%u",
        .create                 = cpu_stop_create,
-       .setup                  = cpu_stop_unpark,
        .park                   = cpu_stop_park,
-       .pre_unpark             = cpu_stop_unpark,
        .selfparking            = true,
 };
 
@@ -578,14 +540,15 @@ static int __init cpu_stop_init(void)
        lg_lock_init(&stop_cpus_lock, "stop_cpus_lock");
 
        BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
+       stop_machine_unpark(raw_smp_processor_id());
        stop_machine_initialized = true;
        return 0;
 }
 early_initcall(cpu_stop_init);
 
-#ifdef CONFIG_STOP_MACHINE
+#if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU)
 
-int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
+static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
 {
        struct multi_stop_data msdata = {
                .fn = fn,
@@ -618,7 +581,7 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
        return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
 }
 
-int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
+int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
 {
        int ret;
 
@@ -652,7 +615,7 @@ EXPORT_SYMBOL_GPL(stop_machine);
  * 0 if all executions of @fn returned 0, any non zero return value if any
  * returned non zero.
  */
-int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
+int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
                                  const struct cpumask *cpus)
 {
        struct multi_stop_data msdata = { .fn = fn, .data = data,
@@ -676,11 +639,11 @@ int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
        ret = multi_cpu_stop(&msdata);
 
        /* Busy wait for completion. */
-       while (atomic_read(&done.nr_todo))
+       while (!completion_done(&done.completion))
                cpu_relax();
 
        mutex_unlock(&stop_cpus_mutex);
        return ret ?: done.ret;
 }
 
-#endif /* CONFIG_STOP_MACHINE */
+#endif /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */