kernel/kernel/cpu.c

   1 /* CPU control.
   2  * (C) 2001, 2002, 2003, 2004 Rusty Russell
   3  *
   4  * This code is licenced under the GPL.
   5  */
   6 #include <linux/proc_fs.h>
   7 #include <linux/smp.h>
   8 #include <linux/init.h>
   9 #include <linux/notifier.h>
  10 #include <linux/sched.h>
  11 #include <linux/unistd.h>
  12 #include <linux/cpu.h>
  13 #include <linux/oom.h>
  14 #include <linux/rcupdate.h>
  15 #include <linux/export.h>
  16 #include <linux/bug.h>
  17 #include <linux/kthread.h>
  18 #include <linux/stop_machine.h>
  19 #include <linux/mutex.h>
  20 #include <linux/gfp.h>
  21 #include <linux/suspend.h>
  22 #include <linux/lockdep.h>
  23 #include <linux/tick.h>
  24 #include <linux/irq.h>
  25 #include <trace/events/power.h>
  26
  27 #include "smpboot.h"
  28
  29 #ifdef CONFIG_SMP
  30 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
  31 static DEFINE_MUTEX(cpu_add_remove_lock);
  32
  33 /*
  34  * The following two APIs (cpu_maps_update_begin/done) must be used when
  35  * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
  36  * The APIs cpu_notifier_register_begin/done() must be used to protect CPU
  37  * hotplug callback (un)registration performed using __register_cpu_notifier()
  38  * or __unregister_cpu_notifier().
  39  */
  40 void cpu_maps_update_begin(void)
  41 {
  42         mutex_lock(&cpu_add_remove_lock);
  43 }
  44 EXPORT_SYMBOL(cpu_notifier_register_begin);
  45
  46 void cpu_maps_update_done(void)
  47 {
  48         mutex_unlock(&cpu_add_remove_lock);
  49 }
  50 EXPORT_SYMBOL(cpu_notifier_register_done);
  51
  52 static RAW_NOTIFIER_HEAD(cpu_chain);
  53
  54 /* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  55  * Should always be manipulated under cpu_add_remove_lock
  56  */
  57 static int cpu_hotplug_disabled;
  58
  59 #ifdef CONFIG_HOTPLUG_CPU
  60
  61 static struct {
  62         struct task_struct *active_writer;
  63         /* wait queue to wake up the active_writer */
  64         wait_queue_head_t wq;
  65         /* verifies that no writer will get active while readers are active */
  66         struct mutex lock;
  67         /*
  68          * Also blocks the new readers during
  69          * an ongoing cpu hotplug operation.
  70          */
  71         atomic_t refcount;
  72
  73 #ifdef CONFIG_DEBUG_LOCK_ALLOC
  74         struct lockdep_map dep_map;
  75 #endif
  76 } cpu_hotplug = {
  77         .active_writer = NULL,
  78         .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
  79         .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
  80 #ifdef CONFIG_DEBUG_LOCK_ALLOC
  81         .dep_map = {.name = "cpu_hotplug.lock" },
  82 #endif
  83 };
  84
  85 /* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
  86 #define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
  87 #define cpuhp_lock_acquire_tryread() \
  88                                   lock_map_acquire_tryread(&cpu_hotplug.dep_map)
  89 #define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
  90 #define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
  91
  92 /**
  93  * hotplug_pcp  - per cpu hotplug descriptor
  94  * @unplug:     set when pin_current_cpu() needs to sync tasks
  95  * @sync_tsk:   the task that waits for tasks to finish pinned sections
  96  * @refcount:   counter of tasks in pinned sections
  97  * @grab_lock:  set when the tasks entering pinned sections should wait
  98  * @synced:     notifier for @sync_tsk to tell cpu_down it's finished
  99  * @mutex:      the mutex to make tasks wait (used when @grab_lock is true)
 100  * @mutex_init: zero if the mutex hasn't been initialized yet.
 101  *
 102  * Although @unplug and @sync_tsk may point to the same task, the @unplug
 103  * is used as a flag and still exists after @sync_tsk has exited and
 104  * @sync_tsk set to NULL.
 105  */
 106 struct hotplug_pcp {
 107         struct task_struct *unplug;
 108         struct task_struct *sync_tsk;
 109         int refcount;
 110         int grab_lock;
 111         struct completion synced;
 112         struct completion unplug_wait;
 113 #ifdef CONFIG_PREEMPT_RT_FULL
 114         /*
 115          * Note, on PREEMPT_RT, the hotplug lock must save the state of
 116          * the task, otherwise the mutex will cause the task to fail
 117          * to sleep when required. (Because it's called from migrate_disable())
 118          *
 119          * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
 120          * state.
 121          */
 122         spinlock_t lock;
 123 #else
 124         struct mutex mutex;
 125 #endif
 126         int mutex_init;
 127 };
 128
 129 #ifdef CONFIG_PREEMPT_RT_FULL
 130 # define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock)
 131 # define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock)
 132 #else
 133 # define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
 134 # define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
 135 #endif
 136
 137 static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
 138
 139 /**
 140  * pin_current_cpu - Prevent the current cpu from being unplugged
 141  *
 142  * Lightweight version of get_online_cpus() to prevent cpu from being
 143  * unplugged when code runs in a migration disabled region.
 144  *
 145  * Must be called with preemption disabled (preempt_count = 1)!
 146  */
 147 void pin_current_cpu(void)
 148 {
 149         struct hotplug_pcp *hp;
 150         int force = 0;
 151
 152 retry:
 153         hp = this_cpu_ptr(&hotplug_pcp);
 154
 155         if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
 156             hp->unplug == current) {
 157                 hp->refcount++;
 158                 return;
 159         }
 160         if (hp->grab_lock) {
 161                 preempt_enable();
 162                 hotplug_lock(hp);
 163                 hotplug_unlock(hp);
 164         } else {
 165                 preempt_enable();
 166                 /*
 167                  * Try to push this task off of this CPU.
 168                  */
 169                 if (!migrate_me()) {
 170                         preempt_disable();
 171                         hp = this_cpu_ptr(&hotplug_pcp);
 172                         if (!hp->grab_lock) {
 173                                 /*
 174                                  * Just let it continue it's already pinned
 175                                  * or about to sleep.
 176                                  */
 177                                 force = 1;
 178                                 goto retry;
 179                         }
 180                         preempt_enable();
 181                 }
 182         }
 183         preempt_disable();
 184         goto retry;
 185 }
 186
 187 /**
 188  * unpin_current_cpu - Allow unplug of current cpu
 189  *
 190  * Must be called with preemption or interrupts disabled!
 191  */
 192 void unpin_current_cpu(void)
 193 {
 194         struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
 195
 196         WARN_ON(hp->refcount <= 0);
 197
 198         /* This is safe. sync_unplug_thread is pinned to this cpu */
 199         if (!--hp->refcount && hp->unplug && hp->unplug != current)
 200                 wake_up_process(hp->unplug);
 201 }
 202
 203 static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
 204 {
 205         set_current_state(TASK_UNINTERRUPTIBLE);
 206         while (hp->refcount) {
 207                 schedule_preempt_disabled();
 208                 set_current_state(TASK_UNINTERRUPTIBLE);
 209         }
 210 }
 211
 212 static int sync_unplug_thread(void *data)
 213 {
 214         struct hotplug_pcp *hp = data;
 215
 216         wait_for_completion(&hp->unplug_wait);
 217         preempt_disable();
 218         hp->unplug = current;
 219         wait_for_pinned_cpus(hp);
 220
 221         /*
 222          * This thread will synchronize the cpu_down() with threads
 223          * that have pinned the CPU. When the pinned CPU count reaches
 224          * zero, we inform the cpu_down code to continue to the next step.
 225          */
 226         set_current_state(TASK_UNINTERRUPTIBLE);
 227         preempt_enable();
 228         complete(&hp->synced);
 229
 230         /*
 231          * If all succeeds, the next step will need tasks to wait till
 232          * the CPU is offline before continuing. To do this, the grab_lock
 233          * is set and tasks going into pin_current_cpu() will block on the
 234          * mutex. But we still need to wait for those that are already in
 235          * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
 236          * will kick this thread out.
 237          */
 238         while (!hp->grab_lock && !kthread_should_stop()) {
 239                 schedule();
 240                 set_current_state(TASK_UNINTERRUPTIBLE);
 241         }
 242
 243         /* Make sure grab_lock is seen before we see a stale completion */
 244         smp_mb();
 245
 246         /*
 247          * Now just before cpu_down() enters stop machine, we need to make
 248          * sure all tasks that are in pinned CPU sections are out, and new
 249          * tasks will now grab the lock, keeping them from entering pinned
 250          * CPU sections.
 251          */
 252         if (!kthread_should_stop()) {
 253                 preempt_disable();
 254                 wait_for_pinned_cpus(hp);
 255                 preempt_enable();
 256                 complete(&hp->synced);
 257         }
 258
 259         set_current_state(TASK_UNINTERRUPTIBLE);
 260         while (!kthread_should_stop()) {
 261                 schedule();
 262                 set_current_state(TASK_UNINTERRUPTIBLE);
 263         }
 264         set_current_state(TASK_RUNNING);
 265
 266         /*
 267          * Force this thread off this CPU as it's going down and
 268          * we don't want any more work on this CPU.
 269          */
 270         current->flags &= ~PF_NO_SETAFFINITY;
 271         set_cpus_allowed_ptr(current, cpu_present_mask);
 272         migrate_me();
 273         return 0;
 274 }
 275
 276 static void __cpu_unplug_sync(struct hotplug_pcp *hp)
 277 {
 278         wake_up_process(hp->sync_tsk);
 279         wait_for_completion(&hp->synced);
 280 }
 281
 282 static void __cpu_unplug_wait(unsigned int cpu)
 283 {
 284         struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
 285
 286         complete(&hp->unplug_wait);
 287         wait_for_completion(&hp->synced);
 288 }
 289
 290 /*
 291  * Start the sync_unplug_thread on the target cpu and wait for it to
 292  * complete.
 293  */
 294 static int cpu_unplug_begin(unsigned int cpu)
 295 {
 296         struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
 297         int err;
 298
 299         /* Protected by cpu_hotplug.lock */
 300         if (!hp->mutex_init) {
 301 #ifdef CONFIG_PREEMPT_RT_FULL
 302                 spin_lock_init(&hp->lock);
 303 #else
 304                 mutex_init(&hp->mutex);
 305 #endif
 306                 hp->mutex_init = 1;
 307         }
 308
 309         /* Inform the scheduler to migrate tasks off this CPU */
 310         tell_sched_cpu_down_begin(cpu);
 311
 312         init_completion(&hp->synced);
 313         init_completion(&hp->unplug_wait);
 314
 315         hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
 316         if (IS_ERR(hp->sync_tsk)) {
 317                 err = PTR_ERR(hp->sync_tsk);
 318                 hp->sync_tsk = NULL;
 319                 return err;
 320         }
 321         kthread_bind(hp->sync_tsk, cpu);
 322
 323         /*
 324          * Wait for tasks to get out of the pinned sections,
 325          * it's still OK if new tasks enter. Some CPU notifiers will
 326          * wait for tasks that are going to enter these sections and
 327          * we must not have them block.
 328          */
 329         wake_up_process(hp->sync_tsk);
 330         return 0;
 331 }
 332
 333 static void cpu_unplug_sync(unsigned int cpu)
 334 {
 335         struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
 336
 337         init_completion(&hp->synced);
 338         /* The completion needs to be initialzied before setting grab_lock */
 339         smp_wmb();
 340
 341         /* Grab the mutex before setting grab_lock */
 342         hotplug_lock(hp);
 343         hp->grab_lock = 1;
 344
 345         /*
 346          * The CPU notifiers have been completed.
 347          * Wait for tasks to get out of pinned CPU sections and have new
 348          * tasks block until the CPU is completely down.
 349          */
 350         __cpu_unplug_sync(hp);
 351
 352         /* All done with the sync thread */
 353         kthread_stop(hp->sync_tsk);
 354         hp->sync_tsk = NULL;
 355 }
 356
 357 static void cpu_unplug_done(unsigned int cpu)
 358 {
 359         struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
 360
 361         hp->unplug = NULL;
 362         /* Let all tasks know cpu unplug is finished before cleaning up */
 363         smp_wmb();
 364
 365         if (hp->sync_tsk)
 366                 kthread_stop(hp->sync_tsk);
 367
 368         if (hp->grab_lock) {
 369                 hotplug_unlock(hp);
 370                 /* protected by cpu_hotplug.lock */
 371                 hp->grab_lock = 0;
 372         }
 373         tell_sched_cpu_down_done(cpu);
 374 }
 375
 376 void get_online_cpus(void)
 377 {
 378         might_sleep();
 379         if (cpu_hotplug.active_writer == current)
 380                 return;
 381         cpuhp_lock_acquire_read();
 382         mutex_lock(&cpu_hotplug.lock);
 383         atomic_inc(&cpu_hotplug.refcount);
 384         mutex_unlock(&cpu_hotplug.lock);
 385 }
 386 EXPORT_SYMBOL_GPL(get_online_cpus);
 387
 388 void put_online_cpus(void)
 389 {
 390         int refcount;
 391
 392         if (cpu_hotplug.active_writer == current)
 393                 return;
 394
 395         refcount = atomic_dec_return(&cpu_hotplug.refcount);
 396         if (WARN_ON(refcount < 0)) /* try to fix things up */
 397                 atomic_inc(&cpu_hotplug.refcount);
 398
 399         if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
 400                 wake_up(&cpu_hotplug.wq);
 401
 402         cpuhp_lock_release();
 403
 404 }
 405 EXPORT_SYMBOL_GPL(put_online_cpus);
 406
 407 /*
 408  * This ensures that the hotplug operation can begin only when the
 409  * refcount goes to zero.
 410  *
 411  * Note that during a cpu-hotplug operation, the new readers, if any,
 412  * will be blocked by the cpu_hotplug.lock
 413  *
 414  * Since cpu_hotplug_begin() is always called after invoking
 415  * cpu_maps_update_begin(), we can be sure that only one writer is active.
 416  *
 417  * Note that theoretically, there is a possibility of a livelock:
 418  * - Refcount goes to zero, last reader wakes up the sleeping
 419  *   writer.
 420  * - Last reader unlocks the cpu_hotplug.lock.
 421  * - A new reader arrives at this moment, bumps up the refcount.
 422  * - The writer acquires the cpu_hotplug.lock finds the refcount
 423  *   non zero and goes to sleep again.
 424  *
 425  * However, this is very difficult to achieve in practice since
 426  * get_online_cpus() not an api which is called all that often.
 427  *
 428  */
 429 void cpu_hotplug_begin(void)
 430 {
 431         DEFINE_WAIT(wait);
 432
 433         cpu_hotplug.active_writer = current;
 434         cpuhp_lock_acquire();
 435
 436         for (;;) {
 437                 mutex_lock(&cpu_hotplug.lock);
 438                 prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
 439                 if (likely(!atomic_read(&cpu_hotplug.refcount)))
 440                                 break;
 441                 mutex_unlock(&cpu_hotplug.lock);
 442                 schedule();
 443         }
 444         finish_wait(&cpu_hotplug.wq, &wait);
 445 }
 446
 447 void cpu_hotplug_done(void)
 448 {
 449         cpu_hotplug.active_writer = NULL;
 450         mutex_unlock(&cpu_hotplug.lock);
 451         cpuhp_lock_release();
 452 }
 453
 454 /*
 455  * Wait for currently running CPU hotplug operations to complete (if any) and
 456  * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
 457  * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
 458  * hotplug path before performing hotplug operations. So acquiring that lock
 459  * guarantees mutual exclusion from any currently running hotplug operations.
 460  */
 461 void cpu_hotplug_disable(void)
 462 {
 463         cpu_maps_update_begin();
 464         cpu_hotplug_disabled++;
 465         cpu_maps_update_done();
 466 }
 467 EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
 468
 469 void cpu_hotplug_enable(void)
 470 {
 471         cpu_maps_update_begin();
 472         WARN_ON(--cpu_hotplug_disabled < 0);
 473         cpu_maps_update_done();
 474 }
 475 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 476 #endif  /* CONFIG_HOTPLUG_CPU */
 477
 478 /* Need to know about CPUs going up/down? */
 479 int register_cpu_notifier(struct notifier_block *nb)
 480 {
 481         int ret;
 482         cpu_maps_update_begin();
 483         ret = raw_notifier_chain_register(&cpu_chain, nb);
 484         cpu_maps_update_done();
 485         return ret;
 486 }
 487
 488 int __register_cpu_notifier(struct notifier_block *nb)
 489 {
 490         return raw_notifier_chain_register(&cpu_chain, nb);
 491 }
 492
 493 static int __cpu_notify(unsigned long val, void *v, int nr_to_call,
 494                         int *nr_calls)
 495 {
 496         int ret;
 497
 498         ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call,
 499                                         nr_calls);
 500
 501         return notifier_to_errno(ret);
 502 }
 503
 504 static int cpu_notify(unsigned long val, void *v)
 505 {
 506         return __cpu_notify(val, v, -1, NULL);
 507 }
 508
 509 #ifdef CONFIG_HOTPLUG_CPU
 510
 511 static void cpu_notify_nofail(unsigned long val, void *v)
 512 {
 513         BUG_ON(cpu_notify(val, v));
 514 }
 515 EXPORT_SYMBOL(register_cpu_notifier);
 516 EXPORT_SYMBOL(__register_cpu_notifier);
 517
 518 void unregister_cpu_notifier(struct notifier_block *nb)
 519 {
 520         cpu_maps_update_begin();
 521         raw_notifier_chain_unregister(&cpu_chain, nb);
 522         cpu_maps_update_done();
 523 }
 524 EXPORT_SYMBOL(unregister_cpu_notifier);
 525
 526 void __unregister_cpu_notifier(struct notifier_block *nb)
 527 {
 528         raw_notifier_chain_unregister(&cpu_chain, nb);
 529 }
 530 EXPORT_SYMBOL(__unregister_cpu_notifier);
 531
 532 /**
 533  * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
 534  * @cpu: a CPU id
 535  *
 536  * This function walks all processes, finds a valid mm struct for each one and
 537  * then clears a corresponding bit in mm's cpumask.  While this all sounds
 538  * trivial, there are various non-obvious corner cases, which this function
 539  * tries to solve in a safe manner.
 540  *
 541  * Also note that the function uses a somewhat relaxed locking scheme, so it may
 542  * be called only for an already offlined CPU.
 543  */
 544 void clear_tasks_mm_cpumask(int cpu)
 545 {
 546         struct task_struct *p;
 547
 548         /*
 549          * This function is called after the cpu is taken down and marked
 550          * offline, so its not like new tasks will ever get this cpu set in
 551          * their mm mask. -- Peter Zijlstra
 552          * Thus, we may use rcu_read_lock() here, instead of grabbing
 553          * full-fledged tasklist_lock.
 554          */
 555         WARN_ON(cpu_online(cpu));
 556         rcu_read_lock();
 557         for_each_process(p) {
 558                 struct task_struct *t;
 559
 560                 /*
 561                  * Main thread might exit, but other threads may still have
 562                  * a valid mm. Find one.
 563                  */
 564                 t = find_lock_task_mm(p);
 565                 if (!t)
 566                         continue;
 567                 cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
 568                 task_unlock(t);
 569         }
 570         rcu_read_unlock();
 571 }
 572
 573 static inline void check_for_tasks(int dead_cpu)
 574 {
 575         struct task_struct *g, *p;
 576
 577         read_lock(&tasklist_lock);
 578         for_each_process_thread(g, p) {
 579                 if (!p->on_rq)
 580                         continue;
 581                 /*
 582                  * We do the check with unlocked task_rq(p)->lock.
 583                  * Order the reading to do not warn about a task,
 584                  * which was running on this cpu in the past, and
 585                  * it's just been woken on another cpu.
 586                  */
 587                 rmb();
 588                 if (task_cpu(p) != dead_cpu)
 589                         continue;
 590
 591                 pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
 592                         p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
 593         }
 594         read_unlock(&tasklist_lock);
 595 }
 596
 597 struct take_cpu_down_param {
 598         unsigned long mod;
 599         void *hcpu;
 600 };
 601
 602 /* Take this CPU down. */
 603 static int take_cpu_down(void *_param)
 604 {
 605         struct take_cpu_down_param *param = _param;
 606         int err;
 607
 608         /* Ensure this CPU doesn't handle any more interrupts. */
 609         err = __cpu_disable();
 610         if (err < 0)
 611                 return err;
 612
 613         cpu_notify(CPU_DYING | param->mod, param->hcpu);
 614         /* Give up timekeeping duties */
 615         tick_handover_do_timer();
 616         /* Park the stopper thread */
 617         stop_machine_park((long)param->hcpu);
 618         return 0;
 619 }
 620
 621 /* Requires cpu_add_remove_lock to be held */
 622 static int _cpu_down(unsigned int cpu, int tasks_frozen)
 623 {
 624         int mycpu, err, nr_calls = 0;
 625         void *hcpu = (void *)(long)cpu;
 626         unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 627         struct take_cpu_down_param tcd_param = {
 628                 .mod = mod,
 629                 .hcpu = hcpu,
 630         };
 631         cpumask_var_t cpumask;
 632         cpumask_var_t cpumask_org;
 633
 634         if (num_online_cpus() == 1)
 635                 return -EBUSY;
 636
 637         if (!cpu_online(cpu))
 638                 return -EINVAL;
 639
 640         /* Move the downtaker off the unplug cpu */
 641         if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
 642                 return -ENOMEM;
 643         if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL))  {
 644                 free_cpumask_var(cpumask);
 645                 return -ENOMEM;
 646         }
 647
 648         cpumask_copy(cpumask_org, tsk_cpus_allowed(current));
 649         cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
 650         set_cpus_allowed_ptr(current, cpumask);
 651         free_cpumask_var(cpumask);
 652         migrate_disable();
 653         mycpu = smp_processor_id();
 654         if (mycpu == cpu) {
 655                 printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
 656                 migrate_enable();
 657                 err = -EBUSY;
 658                 goto restore_cpus;
 659         }
 660         migrate_enable();
 661
 662         cpu_hotplug_begin();
 663         err = cpu_unplug_begin(cpu);
 664         if (err) {
 665                 printk("cpu_unplug_begin(%d) failed\n", cpu);
 666                 goto out_cancel;
 667         }
 668
 669         err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
 670         if (err) {
 671                 nr_calls--;
 672                 __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
 673                 pr_warn("%s: attempt to take down CPU %u failed\n",
 674                         __func__, cpu);
 675                 goto out_release;
 676         }
 677
 678         /*
 679          * By now we've cleared cpu_active_mask, wait for all preempt-disabled
 680          * and RCU users of this state to go away such that all new such users
 681          * will observe it.
 682          *
 683          * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
 684          * not imply sync_sched(), so wait for both.
 685          *
 686          * Do sync before park smpboot threads to take care the rcu boost case.
 687          */
 688         if (IS_ENABLED(CONFIG_PREEMPT))
 689                 synchronize_rcu_mult(call_rcu, call_rcu_sched);
 690         else
 691                 synchronize_rcu();
 692
 693         __cpu_unplug_wait(cpu);
 694         smpboot_park_threads(cpu);
 695
 696         /* Notifiers are done. Don't let any more tasks pin this CPU. */
 697         cpu_unplug_sync(cpu);
 698
 699         /*
 700          * Prevent irq alloc/free while the dying cpu reorganizes the
 701          * interrupt affinities.
 702          */
 703         irq_lock_sparse();
 704
 705         /*
 706          * So now all preempt/rcu users must observe !cpu_active().
 707          */
 708         err = stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
 709         if (err) {
 710                 /* CPU didn't die: tell everyone.  Can't complain. */
 711                 cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
 712                 irq_unlock_sparse();
 713                 goto out_release;
 714         }
 715         BUG_ON(cpu_online(cpu));
 716
 717         /*
 718          * The migration_call() CPU_DYING callback will have removed all
 719          * runnable tasks from the cpu, there's only the idle task left now
 720          * that the migration thread is done doing the stop_machine thing.
 721          *
 722          * Wait for the stop thread to go away.
 723          */
 724         while (!per_cpu(cpu_dead_idle, cpu))
 725                 cpu_relax();
 726         smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */
 727         per_cpu(cpu_dead_idle, cpu) = false;
 728
 729         /* Interrupts are moved away from the dying cpu, reenable alloc/free */
 730         irq_unlock_sparse();
 731
 732         hotplug_cpu__broadcast_tick_pull(cpu);
 733         /* This actually kills the CPU. */
 734         __cpu_die(cpu);
 735
 736         /* CPU is completely dead: tell everyone.  Too late to complain. */
 737         tick_cleanup_dead_cpu(cpu);
 738         cpu_notify_nofail(CPU_DEAD | mod, hcpu);
 739
 740         check_for_tasks(cpu);
 741
 742 out_release:
 743         cpu_unplug_done(cpu);
 744 out_cancel:
 745         cpu_hotplug_done();
 746         if (!err)
 747                 cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
 748 restore_cpus:
 749         set_cpus_allowed_ptr(current, cpumask_org);
 750         free_cpumask_var(cpumask_org);
 751         return err;
 752 }
 753
 754 int cpu_down(unsigned int cpu)
 755 {
 756         int err;
 757
 758         cpu_maps_update_begin();
 759
 760         if (cpu_hotplug_disabled) {
 761                 err = -EBUSY;
 762                 goto out;
 763         }
 764
 765         err = _cpu_down(cpu, 0);
 766
 767 out:
 768         cpu_maps_update_done();
 769         return err;
 770 }
 771 EXPORT_SYMBOL(cpu_down);
 772 #endif /*CONFIG_HOTPLUG_CPU*/
 773
 774 /*
 775  * Unpark per-CPU smpboot kthreads at CPU-online time.
 776  */
 777 static int smpboot_thread_call(struct notifier_block *nfb,
 778                                unsigned long action, void *hcpu)
 779 {
 780         int cpu = (long)hcpu;
 781
 782         switch (action & ~CPU_TASKS_FROZEN) {
 783
 784         case CPU_DOWN_FAILED:
 785         case CPU_ONLINE:
 786                 smpboot_unpark_threads(cpu);
 787                 break;
 788
 789         default:
 790                 break;
 791         }
 792
 793         return NOTIFY_OK;
 794 }
 795
 796 static struct notifier_block smpboot_thread_notifier = {
 797         .notifier_call = smpboot_thread_call,
 798         .priority = CPU_PRI_SMPBOOT,
 799 };
 800
 801 void smpboot_thread_init(void)
 802 {
 803         register_cpu_notifier(&smpboot_thread_notifier);
 804 }
 805
 806 /* Requires cpu_add_remove_lock to be held */
 807 static int _cpu_up(unsigned int cpu, int tasks_frozen)
 808 {
 809         int ret, nr_calls = 0;
 810         void *hcpu = (void *)(long)cpu;
 811         unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 812         struct task_struct *idle;
 813
 814         cpu_hotplug_begin();
 815
 816         if (cpu_online(cpu) || !cpu_present(cpu)) {
 817                 ret = -EINVAL;
 818                 goto out;
 819         }
 820
 821         idle = idle_thread_get(cpu);
 822         if (IS_ERR(idle)) {
 823                 ret = PTR_ERR(idle);
 824                 goto out;
 825         }
 826
 827         ret = smpboot_create_threads(cpu);
 828         if (ret)
 829                 goto out;
 830
 831         ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
 832         if (ret) {
 833                 nr_calls--;
 834                 pr_warn("%s: attempt to bring up CPU %u failed\n",
 835                         __func__, cpu);
 836                 goto out_notify;
 837         }
 838
 839         /* Arch-specific enabling code. */
 840         ret = __cpu_up(cpu, idle);
 841
 842         if (ret != 0)
 843                 goto out_notify;
 844         BUG_ON(!cpu_online(cpu));
 845
 846         /* Now call notifier in preparation. */
 847         cpu_notify(CPU_ONLINE | mod, hcpu);
 848
 849 out_notify:
 850         if (ret != 0)
 851                 __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
 852 out:
 853         cpu_hotplug_done();
 854
 855         return ret;
 856 }
 857
 858 int cpu_up(unsigned int cpu)
 859 {
 860         int err = 0;
 861
 862         if (!cpu_possible(cpu)) {
 863                 pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
 864                        cpu);
 865 #if defined(CONFIG_IA64)
 866                 pr_err("please check additional_cpus= boot parameter\n");
 867 #endif
 868                 return -EINVAL;
 869         }
 870
 871         err = try_online_node(cpu_to_node(cpu));
 872         if (err)
 873                 return err;
 874
 875         cpu_maps_update_begin();
 876
 877         if (cpu_hotplug_disabled) {
 878                 err = -EBUSY;
 879                 goto out;
 880         }
 881
 882         err = _cpu_up(cpu, 0);
 883
 884 out:
 885         cpu_maps_update_done();
 886         return err;
 887 }
 888 EXPORT_SYMBOL_GPL(cpu_up);
 889
 890 #ifdef CONFIG_PM_SLEEP_SMP
 891 static cpumask_var_t frozen_cpus;
 892
 893 int disable_nonboot_cpus(void)
 894 {
 895         int cpu, first_cpu, error = 0;
 896
 897         cpu_maps_update_begin();
 898         first_cpu = cpumask_first(cpu_online_mask);
 899         /*
 900          * We take down all of the non-boot CPUs in one shot to avoid races
 901          * with the userspace trying to use the CPU hotplug at the same time
 902          */
 903         cpumask_clear(frozen_cpus);
 904
 905         pr_info("Disabling non-boot CPUs ...\n");
 906         for_each_online_cpu(cpu) {
 907                 if (cpu == first_cpu)
 908                         continue;
 909                 trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
 910                 error = _cpu_down(cpu, 1);
 911                 trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
 912                 if (!error)
 913                         cpumask_set_cpu(cpu, frozen_cpus);
 914                 else {
 915                         pr_err("Error taking CPU%d down: %d\n", cpu, error);
 916                         break;
 917                 }
 918         }
 919
 920         if (!error)
 921                 BUG_ON(num_online_cpus() > 1);
 922         else
 923                 pr_err("Non-boot CPUs are not disabled\n");
 924
 925         /*
 926          * Make sure the CPUs won't be enabled by someone else. We need to do
 927          * this even in case of failure as all disable_nonboot_cpus() users are
 928          * supposed to do enable_nonboot_cpus() on the failure path.
 929          */
 930         cpu_hotplug_disabled++;
 931
 932         cpu_maps_update_done();
 933         return error;
 934 }
 935
 936 void __weak arch_enable_nonboot_cpus_begin(void)
 937 {
 938 }
 939
 940 void __weak arch_enable_nonboot_cpus_end(void)
 941 {
 942 }
 943
 944 void enable_nonboot_cpus(void)
 945 {
 946         int cpu, error;
 947
 948         /* Allow everyone to use the CPU hotplug again */
 949         cpu_maps_update_begin();
 950         WARN_ON(--cpu_hotplug_disabled < 0);
 951         if (cpumask_empty(frozen_cpus))
 952                 goto out;
 953
 954         pr_info("Enabling non-boot CPUs ...\n");
 955
 956         arch_enable_nonboot_cpus_begin();
 957
 958         for_each_cpu(cpu, frozen_cpus) {
 959                 trace_suspend_resume(TPS("CPU_ON"), cpu, true);
 960                 error = _cpu_up(cpu, 1);
 961                 trace_suspend_resume(TPS("CPU_ON"), cpu, false);
 962                 if (!error) {
 963                         pr_info("CPU%d is up\n", cpu);
 964                         continue;
 965                 }
 966                 pr_warn("Error taking CPU%d up: %d\n", cpu, error);
 967         }
 968
 969         arch_enable_nonboot_cpus_end();
 970
 971         cpumask_clear(frozen_cpus);
 972 out:
 973         cpu_maps_update_done();
 974 }
 975
 976 static int __init alloc_frozen_cpus(void)
 977 {
 978         if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
 979                 return -ENOMEM;
 980         return 0;
 981 }
 982 core_initcall(alloc_frozen_cpus);
 983
 984 /*
 985  * When callbacks for CPU hotplug notifications are being executed, we must
 986  * ensure that the state of the system with respect to the tasks being frozen
 987  * or not, as reported by the notification, remains unchanged *throughout the
 988  * duration* of the execution of the callbacks.
 989  * Hence we need to prevent the freezer from racing with regular CPU hotplug.
 990  *
 991  * This synchronization is implemented by mutually excluding regular CPU
 992  * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
 993  * Hibernate notifications.
 994  */
 995 static int
 996 cpu_hotplug_pm_callback(struct notifier_block *nb,
 997                         unsigned long action, void *ptr)
 998 {
 999         switch (action) {
1000
1001         case PM_SUSPEND_PREPARE:
1002         case PM_HIBERNATION_PREPARE:
1003                 cpu_hotplug_disable();
1004                 break;
1005
1006         case PM_POST_SUSPEND:
1007         case PM_POST_HIBERNATION:
1008                 cpu_hotplug_enable();
1009                 break;
1010
1011         default:
1012                 return NOTIFY_DONE;
1013         }
1014
1015         return NOTIFY_OK;
1016 }
1017
1018
1019 static int __init cpu_hotplug_pm_sync_init(void)
1020 {
1021         /*
1022          * cpu_hotplug_pm_callback has higher priority than x86
1023          * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1024          * to disable cpu hotplug to avoid cpu hotplug race.
1025          */
1026         pm_notifier(cpu_hotplug_pm_callback, 0);
1027         return 0;
1028 }
1029 core_initcall(cpu_hotplug_pm_sync_init);
1030
1031 #endif /* CONFIG_PM_SLEEP_SMP */
1032
1033 /**
1034  * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
1035  * @cpu: cpu that just started
1036  *
1037  * This function calls the cpu_chain notifiers with CPU_STARTING.
1038  * It must be called by the arch code on the new cpu, before the new cpu
1039  * enables interrupts and before the "boot" cpu returns from __cpu_up().
1040  */
1041 void notify_cpu_starting(unsigned int cpu)
1042 {
1043         unsigned long val = CPU_STARTING;
1044
1045 #ifdef CONFIG_PM_SLEEP_SMP
1046         if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
1047                 val = CPU_STARTING_FROZEN;
1048 #endif /* CONFIG_PM_SLEEP_SMP */
1049         cpu_notify(val, (void *)(long)cpu);
1050 }
1051
1052 #endif /* CONFIG_SMP */
1053
1054 /*
1055  * cpu_bit_bitmap[] is a special, "compressed" data structure that
1056  * represents all NR_CPUS bits binary values of 1<<nr.
1057  *
1058  * It is used by cpumask_of() to get a constant address to a CPU
1059  * mask value that has a single bit set only.
1060  */
1061
1062 /* cpu_bit_bitmap[0] is empty - so we can back into it */
1063 #define MASK_DECLARE_1(x)       [x+1][0] = (1UL << (x))
1064 #define MASK_DECLARE_2(x)       MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
1065 #define MASK_DECLARE_4(x)       MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
1066 #define MASK_DECLARE_8(x)       MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
1067
1068 const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
1069
1070         MASK_DECLARE_8(0),      MASK_DECLARE_8(8),
1071         MASK_DECLARE_8(16),     MASK_DECLARE_8(24),
1072 #if BITS_PER_LONG > 32
1073         MASK_DECLARE_8(32),     MASK_DECLARE_8(40),
1074         MASK_DECLARE_8(48),     MASK_DECLARE_8(56),
1075 #endif
1076 };
1077 EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
1078
1079 const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
1080 EXPORT_SYMBOL(cpu_all_bits);
1081
1082 #ifdef CONFIG_INIT_ALL_POSSIBLE
1083 static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
1084         = CPU_BITS_ALL;
1085 #else
1086 static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
1087 #endif
1088 const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
1089 EXPORT_SYMBOL(cpu_possible_mask);
1090
1091 static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
1092 const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
1093 EXPORT_SYMBOL(cpu_online_mask);
1094
1095 static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
1096 const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
1097 EXPORT_SYMBOL(cpu_present_mask);
1098
1099 static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
1100 const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
1101 EXPORT_SYMBOL(cpu_active_mask);
1102
1103 void set_cpu_possible(unsigned int cpu, bool possible)
1104 {
1105         if (possible)
1106                 cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
1107         else
1108                 cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
1109 }
1110
1111 void set_cpu_present(unsigned int cpu, bool present)
1112 {
1113         if (present)
1114                 cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
1115         else
1116                 cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
1117 }
1118
1119 void set_cpu_online(unsigned int cpu, bool online)
1120 {
1121         if (online) {
1122                 cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
1123                 cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
1124         } else {
1125                 cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
1126         }
1127 }
1128
1129 void set_cpu_active(unsigned int cpu, bool active)
1130 {
1131         if (active)
1132                 cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
1133         else
1134                 cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
1135 }
1136
1137 void init_cpu_present(const struct cpumask *src)
1138 {
1139         cpumask_copy(to_cpumask(cpu_present_bits), src);
1140 }
1141
1142 void init_cpu_possible(const struct cpumask *src)
1143 {
1144         cpumask_copy(to_cpumask(cpu_possible_bits), src);
1145 }
1146
1147 void init_cpu_online(const struct cpumask *src)
1148 {
1149         cpumask_copy(to_cpumask(cpu_online_bits), src);
1150 }