kernel/kernel/cpu.c

   1 /* CPU control.
   2  * (C) 2001, 2002, 2003, 2004 Rusty Russell
   3  *
   4  * This code is licenced under the GPL.
   5  */
   6 #include <linux/proc_fs.h>
   7 #include <linux/smp.h>
   8 #include <linux/init.h>
   9 #include <linux/notifier.h>
  10 #include <linux/sched.h>
  11 #include <linux/unistd.h>
  12 #include <linux/cpu.h>
  13 #include <linux/oom.h>
  14 #include <linux/rcupdate.h>
  15 #include <linux/export.h>
  16 #include <linux/bug.h>
  17 #include <linux/kthread.h>
  18 #include <linux/stop_machine.h>
  19 #include <linux/mutex.h>
  20 #include <linux/gfp.h>
  21 #include <linux/suspend.h>
  22 #include <linux/lockdep.h>
  23 #include <linux/tick.h>
  24 #include <trace/events/power.h>
  25
  26 #include "smpboot.h"
  27
  28 #ifdef CONFIG_SMP
  29 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
  30 static DEFINE_MUTEX(cpu_add_remove_lock);
  31
  32 /*
  33  * The following two APIs (cpu_maps_update_begin/done) must be used when
  34  * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
  35  * The APIs cpu_notifier_register_begin/done() must be used to protect CPU
  36  * hotplug callback (un)registration performed using __register_cpu_notifier()
  37  * or __unregister_cpu_notifier().
  38  */
  39 void cpu_maps_update_begin(void)
  40 {
  41         mutex_lock(&cpu_add_remove_lock);
  42 }
  43 EXPORT_SYMBOL(cpu_notifier_register_begin);
  44
  45 void cpu_maps_update_done(void)
  46 {
  47         mutex_unlock(&cpu_add_remove_lock);
  48 }
  49 EXPORT_SYMBOL(cpu_notifier_register_done);
  50
  51 static RAW_NOTIFIER_HEAD(cpu_chain);
  52
  53 /* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  54  * Should always be manipulated under cpu_add_remove_lock
  55  */
  56 static int cpu_hotplug_disabled;
  57
  58 #ifdef CONFIG_HOTPLUG_CPU
  59
  60 static struct {
  61         struct task_struct *active_writer;
  62         /* wait queue to wake up the active_writer */
  63         wait_queue_head_t wq;
  64         /* verifies that no writer will get active while readers are active */
  65         struct mutex lock;
  66         /*
  67          * Also blocks the new readers during
  68          * an ongoing cpu hotplug operation.
  69          */
  70         atomic_t refcount;
  71
  72 #ifdef CONFIG_DEBUG_LOCK_ALLOC
  73         struct lockdep_map dep_map;
  74 #endif
  75 } cpu_hotplug = {
  76         .active_writer = NULL,
  77         .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
  78         .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
  79 #ifdef CONFIG_DEBUG_LOCK_ALLOC
  80         .dep_map = {.name = "cpu_hotplug.lock" },
  81 #endif
  82 };
  83
  84 /* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
  85 #define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
  86 #define cpuhp_lock_acquire_tryread() \
  87                                   lock_map_acquire_tryread(&cpu_hotplug.dep_map)
  88 #define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
  89 #define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
  90
  91 /**
  92  * hotplug_pcp  - per cpu hotplug descriptor
  93  * @unplug:     set when pin_current_cpu() needs to sync tasks
  94  * @sync_tsk:   the task that waits for tasks to finish pinned sections
  95  * @refcount:   counter of tasks in pinned sections
  96  * @grab_lock:  set when the tasks entering pinned sections should wait
  97  * @synced:     notifier for @sync_tsk to tell cpu_down it's finished
  98  * @mutex:      the mutex to make tasks wait (used when @grab_lock is true)
  99  * @mutex_init: zero if the mutex hasn't been initialized yet.
 100  *
 101  * Although @unplug and @sync_tsk may point to the same task, the @unplug
 102  * is used as a flag and still exists after @sync_tsk has exited and
 103  * @sync_tsk set to NULL.
 104  */
 105 struct hotplug_pcp {
 106         struct task_struct *unplug;
 107         struct task_struct *sync_tsk;
 108         int refcount;
 109         int grab_lock;
 110         struct completion synced;
 111         struct completion unplug_wait;
 112 #ifdef CONFIG_PREEMPT_RT_FULL
 113         /*
 114          * Note, on PREEMPT_RT, the hotplug lock must save the state of
 115          * the task, otherwise the mutex will cause the task to fail
 116          * to sleep when required. (Because it's called from migrate_disable())
 117          *
 118          * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
 119          * state.
 120          */
 121         spinlock_t lock;
 122 #else
 123         struct mutex mutex;
 124 #endif
 125         int mutex_init;
 126 };
 127
 128 #ifdef CONFIG_PREEMPT_RT_FULL
 129 # define hotplug_lock(hp) rt_spin_lock(&(hp)->lock)
 130 # define hotplug_unlock(hp) rt_spin_unlock(&(hp)->lock)
 131 #else
 132 # define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
 133 # define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
 134 #endif
 135
 136 static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
 137
 138 /**
 139  * pin_current_cpu - Prevent the current cpu from being unplugged
 140  *
 141  * Lightweight version of get_online_cpus() to prevent cpu from being
 142  * unplugged when code runs in a migration disabled region.
 143  *
 144  * Must be called with preemption disabled (preempt_count = 1)!
 145  */
 146 void pin_current_cpu(void)
 147 {
 148         struct hotplug_pcp *hp;
 149         int force = 0;
 150
 151 retry:
 152         hp = this_cpu_ptr(&hotplug_pcp);
 153
 154         if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
 155             hp->unplug == current) {
 156                 hp->refcount++;
 157                 return;
 158         }
 159         if (hp->grab_lock) {
 160                 preempt_enable();
 161                 hotplug_lock(hp);
 162                 hotplug_unlock(hp);
 163         } else {
 164                 preempt_enable();
 165                 /*
 166                  * Try to push this task off of this CPU.
 167                  */
 168                 if (!migrate_me()) {
 169                         preempt_disable();
 170                         hp = this_cpu_ptr(&hotplug_pcp);
 171                         if (!hp->grab_lock) {
 172                                 /*
 173                                  * Just let it continue it's already pinned
 174                                  * or about to sleep.
 175                                  */
 176                                 force = 1;
 177                                 goto retry;
 178                         }
 179                         preempt_enable();
 180                 }
 181         }
 182         preempt_disable();
 183         goto retry;
 184 }
 185
 186 /**
 187  * unpin_current_cpu - Allow unplug of current cpu
 188  *
 189  * Must be called with preemption or interrupts disabled!
 190  */
 191 void unpin_current_cpu(void)
 192 {
 193         struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
 194
 195         WARN_ON(hp->refcount <= 0);
 196
 197         /* This is safe. sync_unplug_thread is pinned to this cpu */
 198         if (!--hp->refcount && hp->unplug && hp->unplug != current)
 199                 wake_up_process(hp->unplug);
 200 }
 201
 202 static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
 203 {
 204         set_current_state(TASK_UNINTERRUPTIBLE);
 205         while (hp->refcount) {
 206                 schedule_preempt_disabled();
 207                 set_current_state(TASK_UNINTERRUPTIBLE);
 208         }
 209 }
 210
 211 static int sync_unplug_thread(void *data)
 212 {
 213         struct hotplug_pcp *hp = data;
 214
 215         wait_for_completion(&hp->unplug_wait);
 216         preempt_disable();
 217         hp->unplug = current;
 218         wait_for_pinned_cpus(hp);
 219
 220         /*
 221          * This thread will synchronize the cpu_down() with threads
 222          * that have pinned the CPU. When the pinned CPU count reaches
 223          * zero, we inform the cpu_down code to continue to the next step.
 224          */
 225         set_current_state(TASK_UNINTERRUPTIBLE);
 226         preempt_enable();
 227         complete(&hp->synced);
 228
 229         /*
 230          * If all succeeds, the next step will need tasks to wait till
 231          * the CPU is offline before continuing. To do this, the grab_lock
 232          * is set and tasks going into pin_current_cpu() will block on the
 233          * mutex. But we still need to wait for those that are already in
 234          * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
 235          * will kick this thread out.
 236          */
 237         while (!hp->grab_lock && !kthread_should_stop()) {
 238                 schedule();
 239                 set_current_state(TASK_UNINTERRUPTIBLE);
 240         }
 241
 242         /* Make sure grab_lock is seen before we see a stale completion */
 243         smp_mb();
 244
 245         /*
 246          * Now just before cpu_down() enters stop machine, we need to make
 247          * sure all tasks that are in pinned CPU sections are out, and new
 248          * tasks will now grab the lock, keeping them from entering pinned
 249          * CPU sections.
 250          */
 251         if (!kthread_should_stop()) {
 252                 preempt_disable();
 253                 wait_for_pinned_cpus(hp);
 254                 preempt_enable();
 255                 complete(&hp->synced);
 256         }
 257
 258         set_current_state(TASK_UNINTERRUPTIBLE);
 259         while (!kthread_should_stop()) {
 260                 schedule();
 261                 set_current_state(TASK_UNINTERRUPTIBLE);
 262         }
 263         set_current_state(TASK_RUNNING);
 264
 265         /*
 266          * Force this thread off this CPU as it's going down and
 267          * we don't want any more work on this CPU.
 268          */
 269         current->flags &= ~PF_NO_SETAFFINITY;
 270         set_cpus_allowed_ptr(current, cpu_present_mask);
 271         migrate_me();
 272         return 0;
 273 }
 274
 275 static void __cpu_unplug_sync(struct hotplug_pcp *hp)
 276 {
 277         wake_up_process(hp->sync_tsk);
 278         wait_for_completion(&hp->synced);
 279 }
 280
 281 static void __cpu_unplug_wait(unsigned int cpu)
 282 {
 283         struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
 284
 285         complete(&hp->unplug_wait);
 286         wait_for_completion(&hp->synced);
 287 }
 288
 289 /*
 290  * Start the sync_unplug_thread on the target cpu and wait for it to
 291  * complete.
 292  */
 293 static int cpu_unplug_begin(unsigned int cpu)
 294 {
 295         struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
 296         int err;
 297
 298         /* Protected by cpu_hotplug.lock */
 299         if (!hp->mutex_init) {
 300 #ifdef CONFIG_PREEMPT_RT_FULL
 301                 spin_lock_init(&hp->lock);
 302 #else
 303                 mutex_init(&hp->mutex);
 304 #endif
 305                 hp->mutex_init = 1;
 306         }
 307
 308         /* Inform the scheduler to migrate tasks off this CPU */
 309         tell_sched_cpu_down_begin(cpu);
 310
 311         init_completion(&hp->synced);
 312         init_completion(&hp->unplug_wait);
 313
 314         hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
 315         if (IS_ERR(hp->sync_tsk)) {
 316                 err = PTR_ERR(hp->sync_tsk);
 317                 hp->sync_tsk = NULL;
 318                 return err;
 319         }
 320         kthread_bind(hp->sync_tsk, cpu);
 321
 322         /*
 323          * Wait for tasks to get out of the pinned sections,
 324          * it's still OK if new tasks enter. Some CPU notifiers will
 325          * wait for tasks that are going to enter these sections and
 326          * we must not have them block.
 327          */
 328         wake_up_process(hp->sync_tsk);
 329         return 0;
 330 }
 331
 332 static void cpu_unplug_sync(unsigned int cpu)
 333 {
 334         struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
 335
 336         init_completion(&hp->synced);
 337         /* The completion needs to be initialzied before setting grab_lock */
 338         smp_wmb();
 339
 340         /* Grab the mutex before setting grab_lock */
 341         hotplug_lock(hp);
 342         hp->grab_lock = 1;
 343
 344         /*
 345          * The CPU notifiers have been completed.
 346          * Wait for tasks to get out of pinned CPU sections and have new
 347          * tasks block until the CPU is completely down.
 348          */
 349         __cpu_unplug_sync(hp);
 350
 351         /* All done with the sync thread */
 352         kthread_stop(hp->sync_tsk);
 353         hp->sync_tsk = NULL;
 354 }
 355
 356 static void cpu_unplug_done(unsigned int cpu)
 357 {
 358         struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
 359
 360         hp->unplug = NULL;
 361         /* Let all tasks know cpu unplug is finished before cleaning up */
 362         smp_wmb();
 363
 364         if (hp->sync_tsk)
 365                 kthread_stop(hp->sync_tsk);
 366
 367         if (hp->grab_lock) {
 368                 hotplug_unlock(hp);
 369                 /* protected by cpu_hotplug.lock */
 370                 hp->grab_lock = 0;
 371         }
 372         tell_sched_cpu_down_done(cpu);
 373 }
 374
 375 void get_online_cpus(void)
 376 {
 377         might_sleep();
 378         if (cpu_hotplug.active_writer == current)
 379                 return;
 380         cpuhp_lock_acquire_read();
 381         mutex_lock(&cpu_hotplug.lock);
 382         atomic_inc(&cpu_hotplug.refcount);
 383         mutex_unlock(&cpu_hotplug.lock);
 384 }
 385 EXPORT_SYMBOL_GPL(get_online_cpus);
 386
 387 bool try_get_online_cpus(void)
 388 {
 389         if (cpu_hotplug.active_writer == current)
 390                 return true;
 391         if (!mutex_trylock(&cpu_hotplug.lock))
 392                 return false;
 393         cpuhp_lock_acquire_tryread();
 394         atomic_inc(&cpu_hotplug.refcount);
 395         mutex_unlock(&cpu_hotplug.lock);
 396         return true;
 397 }
 398 EXPORT_SYMBOL_GPL(try_get_online_cpus);
 399
 400 void put_online_cpus(void)
 401 {
 402         int refcount;
 403
 404         if (cpu_hotplug.active_writer == current)
 405                 return;
 406
 407         refcount = atomic_dec_return(&cpu_hotplug.refcount);
 408         if (WARN_ON(refcount < 0)) /* try to fix things up */
 409                 atomic_inc(&cpu_hotplug.refcount);
 410
 411         if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
 412                 wake_up(&cpu_hotplug.wq);
 413
 414         cpuhp_lock_release();
 415
 416 }
 417 EXPORT_SYMBOL_GPL(put_online_cpus);
 418
 419 /*
 420  * This ensures that the hotplug operation can begin only when the
 421  * refcount goes to zero.
 422  *
 423  * Note that during a cpu-hotplug operation, the new readers, if any,
 424  * will be blocked by the cpu_hotplug.lock
 425  *
 426  * Since cpu_hotplug_begin() is always called after invoking
 427  * cpu_maps_update_begin(), we can be sure that only one writer is active.
 428  *
 429  * Note that theoretically, there is a possibility of a livelock:
 430  * - Refcount goes to zero, last reader wakes up the sleeping
 431  *   writer.
 432  * - Last reader unlocks the cpu_hotplug.lock.
 433  * - A new reader arrives at this moment, bumps up the refcount.
 434  * - The writer acquires the cpu_hotplug.lock finds the refcount
 435  *   non zero and goes to sleep again.
 436  *
 437  * However, this is very difficult to achieve in practice since
 438  * get_online_cpus() not an api which is called all that often.
 439  *
 440  */
 441 void cpu_hotplug_begin(void)
 442 {
 443         DEFINE_WAIT(wait);
 444
 445         cpu_hotplug.active_writer = current;
 446         cpuhp_lock_acquire();
 447
 448         for (;;) {
 449                 mutex_lock(&cpu_hotplug.lock);
 450                 prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
 451                 if (likely(!atomic_read(&cpu_hotplug.refcount)))
 452                                 break;
 453                 mutex_unlock(&cpu_hotplug.lock);
 454                 schedule();
 455         }
 456         finish_wait(&cpu_hotplug.wq, &wait);
 457 }
 458
 459 void cpu_hotplug_done(void)
 460 {
 461         cpu_hotplug.active_writer = NULL;
 462         mutex_unlock(&cpu_hotplug.lock);
 463         cpuhp_lock_release();
 464 }
 465
 466 /*
 467  * Wait for currently running CPU hotplug operations to complete (if any) and
 468  * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
 469  * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
 470  * hotplug path before performing hotplug operations. So acquiring that lock
 471  * guarantees mutual exclusion from any currently running hotplug operations.
 472  */
 473 void cpu_hotplug_disable(void)
 474 {
 475         cpu_maps_update_begin();
 476         cpu_hotplug_disabled = 1;
 477         cpu_maps_update_done();
 478 }
 479
 480 void cpu_hotplug_enable(void)
 481 {
 482         cpu_maps_update_begin();
 483         cpu_hotplug_disabled = 0;
 484         cpu_maps_update_done();
 485 }
 486
 487 #endif  /* CONFIG_HOTPLUG_CPU */
 488
 489 /* Need to know about CPUs going up/down? */
 490 int __ref register_cpu_notifier(struct notifier_block *nb)
 491 {
 492         int ret;
 493         cpu_maps_update_begin();
 494         ret = raw_notifier_chain_register(&cpu_chain, nb);
 495         cpu_maps_update_done();
 496         return ret;
 497 }
 498
 499 int __ref __register_cpu_notifier(struct notifier_block *nb)
 500 {
 501         return raw_notifier_chain_register(&cpu_chain, nb);
 502 }
 503
 504 static int __cpu_notify(unsigned long val, void *v, int nr_to_call,
 505                         int *nr_calls)
 506 {
 507         int ret;
 508
 509         ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call,
 510                                         nr_calls);
 511
 512         return notifier_to_errno(ret);
 513 }
 514
 515 static int cpu_notify(unsigned long val, void *v)
 516 {
 517         return __cpu_notify(val, v, -1, NULL);
 518 }
 519
 520 #ifdef CONFIG_HOTPLUG_CPU
 521
 522 static void cpu_notify_nofail(unsigned long val, void *v)
 523 {
 524         BUG_ON(cpu_notify(val, v));
 525 }
 526 EXPORT_SYMBOL(register_cpu_notifier);
 527 EXPORT_SYMBOL(__register_cpu_notifier);
 528
 529 void __ref unregister_cpu_notifier(struct notifier_block *nb)
 530 {
 531         cpu_maps_update_begin();
 532         raw_notifier_chain_unregister(&cpu_chain, nb);
 533         cpu_maps_update_done();
 534 }
 535 EXPORT_SYMBOL(unregister_cpu_notifier);
 536
 537 void __ref __unregister_cpu_notifier(struct notifier_block *nb)
 538 {
 539         raw_notifier_chain_unregister(&cpu_chain, nb);
 540 }
 541 EXPORT_SYMBOL(__unregister_cpu_notifier);
 542
 543 /**
 544  * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
 545  * @cpu: a CPU id
 546  *
 547  * This function walks all processes, finds a valid mm struct for each one and
 548  * then clears a corresponding bit in mm's cpumask.  While this all sounds
 549  * trivial, there are various non-obvious corner cases, which this function
 550  * tries to solve in a safe manner.
 551  *
 552  * Also note that the function uses a somewhat relaxed locking scheme, so it may
 553  * be called only for an already offlined CPU.
 554  */
 555 void clear_tasks_mm_cpumask(int cpu)
 556 {
 557         struct task_struct *p;
 558
 559         /*
 560          * This function is called after the cpu is taken down and marked
 561          * offline, so its not like new tasks will ever get this cpu set in
 562          * their mm mask. -- Peter Zijlstra
 563          * Thus, we may use rcu_read_lock() here, instead of grabbing
 564          * full-fledged tasklist_lock.
 565          */
 566         WARN_ON(cpu_online(cpu));
 567         rcu_read_lock();
 568         for_each_process(p) {
 569                 struct task_struct *t;
 570
 571                 /*
 572                  * Main thread might exit, but other threads may still have
 573                  * a valid mm. Find one.
 574                  */
 575                 t = find_lock_task_mm(p);
 576                 if (!t)
 577                         continue;
 578                 cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
 579                 task_unlock(t);
 580         }
 581         rcu_read_unlock();
 582 }
 583
 584 static inline void check_for_tasks(int dead_cpu)
 585 {
 586         struct task_struct *g, *p;
 587
 588         read_lock_irq(&tasklist_lock);
 589         do_each_thread(g, p) {
 590                 if (!p->on_rq)
 591                         continue;
 592                 /*
 593                  * We do the check with unlocked task_rq(p)->lock.
 594                  * Order the reading to do not warn about a task,
 595                  * which was running on this cpu in the past, and
 596                  * it's just been woken on another cpu.
 597                  */
 598                 rmb();
 599                 if (task_cpu(p) != dead_cpu)
 600                         continue;
 601
 602                 pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
 603                         p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
 604         } while_each_thread(g, p);
 605         read_unlock_irq(&tasklist_lock);
 606 }
 607
 608 struct take_cpu_down_param {
 609         unsigned long mod;
 610         void *hcpu;
 611 };
 612
 613 /* Take this CPU down. */
 614 static int __ref take_cpu_down(void *_param)
 615 {
 616         struct take_cpu_down_param *param = _param;
 617         int err;
 618
 619         /* Ensure this CPU doesn't handle any more interrupts. */
 620         err = __cpu_disable();
 621         if (err < 0)
 622                 return err;
 623
 624         cpu_notify(CPU_DYING | param->mod, param->hcpu);
 625         /* Give up timekeeping duties */
 626         tick_handover_do_timer();
 627         /* Park the stopper thread */
 628         kthread_park(current);
 629         return 0;
 630 }
 631
 632 /* Requires cpu_add_remove_lock to be held */
 633 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 634 {
 635         int mycpu, err, nr_calls = 0;
 636         void *hcpu = (void *)(long)cpu;
 637         unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 638         struct take_cpu_down_param tcd_param = {
 639                 .mod = mod,
 640                 .hcpu = hcpu,
 641         };
 642         cpumask_var_t cpumask;
 643         cpumask_var_t cpumask_org;
 644
 645         if (num_online_cpus() == 1)
 646                 return -EBUSY;
 647
 648         if (!cpu_online(cpu))
 649                 return -EINVAL;
 650
 651         /* Move the downtaker off the unplug cpu */
 652         if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
 653                 return -ENOMEM;
 654         if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL))  {
 655                 free_cpumask_var(cpumask);
 656                 return -ENOMEM;
 657         }
 658
 659         cpumask_copy(cpumask_org, tsk_cpus_allowed(current));
 660         cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
 661         set_cpus_allowed_ptr(current, cpumask);
 662         free_cpumask_var(cpumask);
 663         migrate_disable();
 664         mycpu = smp_processor_id();
 665         if (mycpu == cpu) {
 666                 printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
 667                 migrate_enable();
 668                 err = -EBUSY;
 669                 goto restore_cpus;
 670         }
 671         migrate_enable();
 672
 673         cpu_hotplug_begin();
 674         err = cpu_unplug_begin(cpu);
 675         if (err) {
 676                 printk("cpu_unplug_begin(%d) failed\n", cpu);
 677                 goto out_cancel;
 678         }
 679
 680         err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
 681         if (err) {
 682                 nr_calls--;
 683                 __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
 684                 pr_warn("%s: attempt to take down CPU %u failed\n",
 685                         __func__, cpu);
 686                 goto out_release;
 687         }
 688
 689         /*
 690          * By now we've cleared cpu_active_mask, wait for all preempt-disabled
 691          * and RCU users of this state to go away such that all new such users
 692          * will observe it.
 693          *
 694          * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
 695          * not imply sync_sched(), so explicitly call both.
 696          *
 697          * Do sync before park smpboot threads to take care the rcu boost case.
 698          */
 699 #ifdef CONFIG_PREEMPT
 700         synchronize_sched();
 701 #endif
 702         synchronize_rcu();
 703
 704         __cpu_unplug_wait(cpu);
 705         smpboot_park_threads(cpu);
 706
 707         /* Notifiers are done. Don't let any more tasks pin this CPU. */
 708         cpu_unplug_sync(cpu);
 709
 710         /*
 711          * So now all preempt/rcu users must observe !cpu_active().
 712          */
 713
 714         err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
 715         if (err) {
 716                 /* CPU didn't die: tell everyone.  Can't complain. */
 717                 smpboot_unpark_threads(cpu);
 718                 cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
 719                 goto out_release;
 720         }
 721         BUG_ON(cpu_online(cpu));
 722
 723         /*
 724          * The migration_call() CPU_DYING callback will have removed all
 725          * runnable tasks from the cpu, there's only the idle task left now
 726          * that the migration thread is done doing the stop_machine thing.
 727          *
 728          * Wait for the stop thread to go away.
 729          */
 730         while (!per_cpu(cpu_dead_idle, cpu))
 731                 cpu_relax();
 732         smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */
 733         per_cpu(cpu_dead_idle, cpu) = false;
 734
 735         hotplug_cpu__broadcast_tick_pull(cpu);
 736         /* This actually kills the CPU. */
 737         __cpu_die(cpu);
 738
 739         /* CPU is completely dead: tell everyone.  Too late to complain. */
 740         tick_cleanup_dead_cpu(cpu);
 741         cpu_notify_nofail(CPU_DEAD | mod, hcpu);
 742
 743         check_for_tasks(cpu);
 744
 745 out_release:
 746         cpu_unplug_done(cpu);
 747 out_cancel:
 748         cpu_hotplug_done();
 749         if (!err)
 750                 cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
 751 restore_cpus:
 752         set_cpus_allowed_ptr(current, cpumask_org);
 753         free_cpumask_var(cpumask_org);
 754         return err;
 755 }
 756
 757 int __ref cpu_down(unsigned int cpu)
 758 {
 759         int err;
 760
 761         cpu_maps_update_begin();
 762
 763         if (cpu_hotplug_disabled) {
 764                 err = -EBUSY;
 765                 goto out;
 766         }
 767
 768         err = _cpu_down(cpu, 0);
 769
 770 out:
 771         cpu_maps_update_done();
 772         return err;
 773 }
 774 EXPORT_SYMBOL(cpu_down);
 775 #endif /*CONFIG_HOTPLUG_CPU*/
 776
 777 /*
 778  * Unpark per-CPU smpboot kthreads at CPU-online time.
 779  */
 780 static int smpboot_thread_call(struct notifier_block *nfb,
 781                                unsigned long action, void *hcpu)
 782 {
 783         int cpu = (long)hcpu;
 784
 785         switch (action & ~CPU_TASKS_FROZEN) {
 786
 787         case CPU_ONLINE:
 788                 smpboot_unpark_threads(cpu);
 789                 break;
 790
 791         default:
 792                 break;
 793         }
 794
 795         return NOTIFY_OK;
 796 }
 797
 798 static struct notifier_block smpboot_thread_notifier = {
 799         .notifier_call = smpboot_thread_call,
 800         .priority = CPU_PRI_SMPBOOT,
 801 };
 802
 803 void __cpuinit smpboot_thread_init(void)
 804 {
 805         register_cpu_notifier(&smpboot_thread_notifier);
 806 }
 807
 808 /* Requires cpu_add_remove_lock to be held */
 809 static int _cpu_up(unsigned int cpu, int tasks_frozen)
 810 {
 811         int ret, nr_calls = 0;
 812         void *hcpu = (void *)(long)cpu;
 813         unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 814         struct task_struct *idle;
 815
 816         cpu_hotplug_begin();
 817
 818         if (cpu_online(cpu) || !cpu_present(cpu)) {
 819                 ret = -EINVAL;
 820                 goto out;
 821         }
 822
 823         idle = idle_thread_get(cpu);
 824         if (IS_ERR(idle)) {
 825                 ret = PTR_ERR(idle);
 826                 goto out;
 827         }
 828
 829         ret = smpboot_create_threads(cpu);
 830         if (ret)
 831                 goto out;
 832
 833         ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
 834         if (ret) {
 835                 nr_calls--;
 836                 pr_warn("%s: attempt to bring up CPU %u failed\n",
 837                         __func__, cpu);
 838                 goto out_notify;
 839         }
 840
 841         /* Arch-specific enabling code. */
 842         ret = __cpu_up(cpu, idle);
 843         if (ret != 0)
 844                 goto out_notify;
 845         BUG_ON(!cpu_online(cpu));
 846
 847         /* Now call notifier in preparation. */
 848         cpu_notify(CPU_ONLINE | mod, hcpu);
 849
 850 out_notify:
 851         if (ret != 0)
 852                 __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
 853 out:
 854         cpu_hotplug_done();
 855
 856         return ret;
 857 }
 858
 859 int cpu_up(unsigned int cpu)
 860 {
 861         int err = 0;
 862
 863         if (!cpu_possible(cpu)) {
 864                 pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
 865                        cpu);
 866 #if defined(CONFIG_IA64)
 867                 pr_err("please check additional_cpus= boot parameter\n");
 868 #endif
 869                 return -EINVAL;
 870         }
 871
 872         err = try_online_node(cpu_to_node(cpu));
 873         if (err)
 874                 return err;
 875
 876         cpu_maps_update_begin();
 877
 878         if (cpu_hotplug_disabled) {
 879                 err = -EBUSY;
 880                 goto out;
 881         }
 882
 883         err = _cpu_up(cpu, 0);
 884
 885 out:
 886         cpu_maps_update_done();
 887         return err;
 888 }
 889 EXPORT_SYMBOL_GPL(cpu_up);
 890
 891 #ifdef CONFIG_PM_SLEEP_SMP
 892 static cpumask_var_t frozen_cpus;
 893
 894 int disable_nonboot_cpus(void)
 895 {
 896         int cpu, first_cpu, error = 0;
 897
 898         cpu_maps_update_begin();
 899         first_cpu = cpumask_first(cpu_online_mask);
 900         /*
 901          * We take down all of the non-boot CPUs in one shot to avoid races
 902          * with the userspace trying to use the CPU hotplug at the same time
 903          */
 904         cpumask_clear(frozen_cpus);
 905
 906         pr_info("Disabling non-boot CPUs ...\n");
 907         for_each_online_cpu(cpu) {
 908                 if (cpu == first_cpu)
 909                         continue;
 910                 trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
 911                 error = _cpu_down(cpu, 1);
 912                 trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
 913                 if (!error)
 914                         cpumask_set_cpu(cpu, frozen_cpus);
 915                 else {
 916                         pr_err("Error taking CPU%d down: %d\n", cpu, error);
 917                         break;
 918                 }
 919         }
 920
 921         if (!error) {
 922                 BUG_ON(num_online_cpus() > 1);
 923                 /* Make sure the CPUs won't be enabled by someone else */
 924                 cpu_hotplug_disabled = 1;
 925         } else {
 926                 pr_err("Non-boot CPUs are not disabled\n");
 927         }
 928         cpu_maps_update_done();
 929         return error;
 930 }
 931
 932 void __weak arch_enable_nonboot_cpus_begin(void)
 933 {
 934 }
 935
 936 void __weak arch_enable_nonboot_cpus_end(void)
 937 {
 938 }
 939
 940 void __ref enable_nonboot_cpus(void)
 941 {
 942         int cpu, error;
 943
 944         /* Allow everyone to use the CPU hotplug again */
 945         cpu_maps_update_begin();
 946         cpu_hotplug_disabled = 0;
 947         if (cpumask_empty(frozen_cpus))
 948                 goto out;
 949
 950         pr_info("Enabling non-boot CPUs ...\n");
 951
 952         arch_enable_nonboot_cpus_begin();
 953
 954         for_each_cpu(cpu, frozen_cpus) {
 955                 trace_suspend_resume(TPS("CPU_ON"), cpu, true);
 956                 error = _cpu_up(cpu, 1);
 957                 trace_suspend_resume(TPS("CPU_ON"), cpu, false);
 958                 if (!error) {
 959                         pr_info("CPU%d is up\n", cpu);
 960                         continue;
 961                 }
 962                 pr_warn("Error taking CPU%d up: %d\n", cpu, error);
 963         }
 964
 965         arch_enable_nonboot_cpus_end();
 966
 967         cpumask_clear(frozen_cpus);
 968 out:
 969         cpu_maps_update_done();
 970 }
 971
 972 static int __init alloc_frozen_cpus(void)
 973 {
 974         if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
 975                 return -ENOMEM;
 976         return 0;
 977 }
 978 core_initcall(alloc_frozen_cpus);
 979
 980 /*
 981  * When callbacks for CPU hotplug notifications are being executed, we must
 982  * ensure that the state of the system with respect to the tasks being frozen
 983  * or not, as reported by the notification, remains unchanged *throughout the
 984  * duration* of the execution of the callbacks.
 985  * Hence we need to prevent the freezer from racing with regular CPU hotplug.
 986  *
 987  * This synchronization is implemented by mutually excluding regular CPU
 988  * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
 989  * Hibernate notifications.
 990  */
 991 static int
 992 cpu_hotplug_pm_callback(struct notifier_block *nb,
 993                         unsigned long action, void *ptr)
 994 {
 995         switch (action) {
 996
 997         case PM_SUSPEND_PREPARE:
 998         case PM_HIBERNATION_PREPARE:
 999                 cpu_hotplug_disable();
1000                 break;
1001
1002         case PM_POST_SUSPEND:
1003         case PM_POST_HIBERNATION:
1004                 cpu_hotplug_enable();
1005                 break;
1006
1007         default:
1008                 return NOTIFY_DONE;
1009         }
1010
1011         return NOTIFY_OK;
1012 }
1013
1014
1015 static int __init cpu_hotplug_pm_sync_init(void)
1016 {
1017         /*
1018          * cpu_hotplug_pm_callback has higher priority than x86
1019          * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1020          * to disable cpu hotplug to avoid cpu hotplug race.
1021          */
1022         pm_notifier(cpu_hotplug_pm_callback, 0);
1023         return 0;
1024 }
1025 core_initcall(cpu_hotplug_pm_sync_init);
1026
1027 #endif /* CONFIG_PM_SLEEP_SMP */
1028
1029 /**
1030  * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
1031  * @cpu: cpu that just started
1032  *
1033  * This function calls the cpu_chain notifiers with CPU_STARTING.
1034  * It must be called by the arch code on the new cpu, before the new cpu
1035  * enables interrupts and before the "boot" cpu returns from __cpu_up().
1036  */
1037 void notify_cpu_starting(unsigned int cpu)
1038 {
1039         unsigned long val = CPU_STARTING;
1040
1041 #ifdef CONFIG_PM_SLEEP_SMP
1042         if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
1043                 val = CPU_STARTING_FROZEN;
1044 #endif /* CONFIG_PM_SLEEP_SMP */
1045         cpu_notify(val, (void *)(long)cpu);
1046 }
1047
1048 #endif /* CONFIG_SMP */
1049
1050 /*
1051  * cpu_bit_bitmap[] is a special, "compressed" data structure that
1052  * represents all NR_CPUS bits binary values of 1<<nr.
1053  *
1054  * It is used by cpumask_of() to get a constant address to a CPU
1055  * mask value that has a single bit set only.
1056  */
1057
1058 /* cpu_bit_bitmap[0] is empty - so we can back into it */
1059 #define MASK_DECLARE_1(x)       [x+1][0] = (1UL << (x))
1060 #define MASK_DECLARE_2(x)       MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
1061 #define MASK_DECLARE_4(x)       MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
1062 #define MASK_DECLARE_8(x)       MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
1063
1064 const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
1065
1066         MASK_DECLARE_8(0),      MASK_DECLARE_8(8),
1067         MASK_DECLARE_8(16),     MASK_DECLARE_8(24),
1068 #if BITS_PER_LONG > 32
1069         MASK_DECLARE_8(32),     MASK_DECLARE_8(40),
1070         MASK_DECLARE_8(48),     MASK_DECLARE_8(56),
1071 #endif
1072 };
1073 EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
1074
1075 const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
1076 EXPORT_SYMBOL(cpu_all_bits);
1077
1078 #ifdef CONFIG_INIT_ALL_POSSIBLE
1079 static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
1080         = CPU_BITS_ALL;
1081 #else
1082 static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
1083 #endif
1084 const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
1085 EXPORT_SYMBOL(cpu_possible_mask);
1086
1087 static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
1088 const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
1089 EXPORT_SYMBOL(cpu_online_mask);
1090
1091 static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
1092 const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
1093 EXPORT_SYMBOL(cpu_present_mask);
1094
1095 static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
1096 const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
1097 EXPORT_SYMBOL(cpu_active_mask);
1098
1099 void set_cpu_possible(unsigned int cpu, bool possible)
1100 {
1101         if (possible)
1102                 cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
1103         else
1104                 cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
1105 }
1106
1107 void set_cpu_present(unsigned int cpu, bool present)
1108 {
1109         if (present)
1110                 cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
1111         else
1112                 cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
1113 }
1114
1115 void set_cpu_online(unsigned int cpu, bool online)
1116 {
1117         if (online) {
1118                 cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
1119                 cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
1120         } else {
1121                 cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
1122         }
1123 }
1124
1125 void set_cpu_active(unsigned int cpu, bool active)
1126 {
1127         if (active)
1128                 cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
1129         else
1130                 cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
1131 }
1132
1133 void init_cpu_present(const struct cpumask *src)
1134 {
1135         cpumask_copy(to_cpumask(cpu_present_bits), src);
1136 }
1137
1138 void init_cpu_possible(const struct cpumask *src)
1139 {
1140         cpumask_copy(to_cpumask(cpu_possible_bits), src);
1141 }
1142
1143 void init_cpu_online(const struct cpumask *src)
1144 {
1145         cpumask_copy(to_cpumask(cpu_online_bits), src);
1146 }