kernel/arch/metag/kernel/smp.c

   1 /*
   2  *  Copyright (C) 2009,2010,2011 Imagination Technologies Ltd.
   3  *
   4  *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 as
   8  * published by the Free Software Foundation.
   9  */
  10 #include <linux/atomic.h>
  11 #include <linux/completion.h>
  12 #include <linux/delay.h>
  13 #include <linux/init.h>
  14 #include <linux/spinlock.h>
  15 #include <linux/sched.h>
  16 #include <linux/interrupt.h>
  17 #include <linux/cache.h>
  18 #include <linux/profile.h>
  19 #include <linux/errno.h>
  20 #include <linux/mm.h>
  21 #include <linux/err.h>
  22 #include <linux/cpu.h>
  23 #include <linux/smp.h>
  24 #include <linux/seq_file.h>
  25 #include <linux/irq.h>
  26 #include <linux/bootmem.h>
  27
  28 #include <asm/cacheflush.h>
  29 #include <asm/cachepart.h>
  30 #include <asm/core_reg.h>
  31 #include <asm/cpu.h>
  32 #include <asm/global_lock.h>
  33 #include <asm/metag_mem.h>
  34 #include <asm/mmu_context.h>
  35 #include <asm/pgtable.h>
  36 #include <asm/pgalloc.h>
  37 #include <asm/processor.h>
  38 #include <asm/setup.h>
  39 #include <asm/tlbflush.h>
  40 #include <asm/hwthread.h>
  41 #include <asm/traps.h>
  42
  43 #define SYSC_DCPART(n)  (SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n))
  44 #define SYSC_ICPART(n)  (SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n))
  45
  46 DECLARE_PER_CPU(PTBI, pTBI);
  47
  48 void *secondary_data_stack;
  49
  50 /*
  51  * structures for inter-processor calls
  52  * - A collection of single bit ipi messages.
  53  */
  54 struct ipi_data {
  55         spinlock_t lock;
  56         unsigned long ipi_count;
  57         unsigned long bits;
  58 };
  59
  60 static DEFINE_PER_CPU(struct ipi_data, ipi_data) = {
  61         .lock   = __SPIN_LOCK_UNLOCKED(ipi_data.lock),
  62 };
  63
  64 static DEFINE_SPINLOCK(boot_lock);
  65
  66 static DECLARE_COMPLETION(cpu_running);
  67
  68 /*
  69  * "thread" is assumed to be a valid Meta hardware thread ID.
  70  */
  71 static int boot_secondary(unsigned int thread, struct task_struct *idle)
  72 {
  73         u32 val;
  74
  75         /*
  76          * set synchronisation state between this boot processor
  77          * and the secondary one
  78          */
  79         spin_lock(&boot_lock);
  80
  81         core_reg_write(TXUPC_ID, 0, thread, (unsigned int)secondary_startup);
  82         core_reg_write(TXUPC_ID, 1, thread, 0);
  83
  84         /*
  85          * Give the thread privilege (PSTAT) and clear potentially problematic
  86          * bits in the process (namely ISTAT, CBMarker, CBMarkerI, LSM_STEP).
  87          */
  88         core_reg_write(TXUCT_ID, TXSTATUS_REGNUM, thread, TXSTATUS_PSTAT_BIT);
  89
  90         /* Clear the minim enable bit. */
  91         val = core_reg_read(TXUCT_ID, TXPRIVEXT_REGNUM, thread);
  92         core_reg_write(TXUCT_ID, TXPRIVEXT_REGNUM, thread, val & ~0x80);
  93
  94         /*
  95          * set the ThreadEnable bit (0x1) in the TXENABLE register
  96          * for the specified thread - off it goes!
  97          */
  98         val = core_reg_read(TXUCT_ID, TXENABLE_REGNUM, thread);
  99         core_reg_write(TXUCT_ID, TXENABLE_REGNUM, thread, val | 0x1);
 100
 101         /*
 102          * now the secondary core is starting up let it run its
 103          * calibrations, then wait for it to finish
 104          */
 105         spin_unlock(&boot_lock);
 106
 107         return 0;
 108 }
 109
 110 /**
 111  * describe_cachepart_change: describe a change to cache partitions.
 112  * @thread:     Hardware thread number.
 113  * @label:      Label of cache type, e.g. "dcache" or "icache".
 114  * @sz:         Total size of the cache.
 115  * @old:        Old cache partition configuration (*CPART* register).
 116  * @new:        New cache partition configuration (*CPART* register).
 117  *
 118  * If the cache partition has changed, prints a message to the log describing
 119  * those changes.
 120  */
 121 static void describe_cachepart_change(unsigned int thread, const char *label,
 122                                       unsigned int sz, unsigned int old,
 123                                       unsigned int new)
 124 {
 125         unsigned int lor1, land1, gor1, gand1;
 126         unsigned int lor2, land2, gor2, gand2;
 127         unsigned int diff = old ^ new;
 128
 129         if (!diff)
 130                 return;
 131
 132         pr_info("Thread %d: %s partition changed:", thread, label);
 133         if (diff & (SYSC_xCPARTL_OR_BITS | SYSC_xCPARTL_AND_BITS)) {
 134                 lor1   = (old & SYSC_xCPARTL_OR_BITS)  >> SYSC_xCPARTL_OR_S;
 135                 lor2   = (new & SYSC_xCPARTL_OR_BITS)  >> SYSC_xCPARTL_OR_S;
 136                 land1  = (old & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
 137                 land2  = (new & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
 138                 pr_cont(" L:%#x+%#x->%#x+%#x",
 139                         (lor1 * sz) >> 4,
 140                         ((land1 + 1) * sz) >> 4,
 141                         (lor2 * sz) >> 4,
 142                         ((land2 + 1) * sz) >> 4);
 143         }
 144         if (diff & (SYSC_xCPARTG_OR_BITS | SYSC_xCPARTG_AND_BITS)) {
 145                 gor1   = (old & SYSC_xCPARTG_OR_BITS)  >> SYSC_xCPARTG_OR_S;
 146                 gor2   = (new & SYSC_xCPARTG_OR_BITS)  >> SYSC_xCPARTG_OR_S;
 147                 gand1  = (old & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
 148                 gand2  = (new & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
 149                 pr_cont(" G:%#x+%#x->%#x+%#x",
 150                         (gor1 * sz) >> 4,
 151                         ((gand1 + 1) * sz) >> 4,
 152                         (gor2 * sz) >> 4,
 153                         ((gand2 + 1) * sz) >> 4);
 154         }
 155         if (diff & SYSC_CWRMODE_BIT)
 156                 pr_cont(" %sWR",
 157                         (new & SYSC_CWRMODE_BIT) ? "+" : "-");
 158         if (diff & SYSC_DCPART_GCON_BIT)
 159                 pr_cont(" %sGCOn",
 160                         (new & SYSC_DCPART_GCON_BIT) ? "+" : "-");
 161         pr_cont("\n");
 162 }
 163
 164 /**
 165  * setup_smp_cache: ensure cache coherency for new SMP thread.
 166  * @thread:     New hardware thread number.
 167  *
 168  * Ensures that coherency is enabled and that the threads share the same cache
 169  * partitions.
 170  */
 171 static void setup_smp_cache(unsigned int thread)
 172 {
 173         unsigned int this_thread, lflags;
 174         unsigned int dcsz, dcpart_this, dcpart_old, dcpart_new;
 175         unsigned int icsz, icpart_old, icpart_new;
 176
 177         /*
 178          * Copy over the current thread's cache partition configuration to the
 179          * new thread so that they share cache partitions.
 180          */
 181         __global_lock2(lflags);
 182         this_thread = hard_processor_id();
 183         /* Share dcache partition */
 184         dcpart_this = metag_in32(SYSC_DCPART(this_thread));
 185         dcpart_old = metag_in32(SYSC_DCPART(thread));
 186         dcpart_new = dcpart_this;
 187 #if PAGE_OFFSET < LINGLOBAL_BASE
 188         /*
 189          * For the local data cache to be coherent the threads must also have
 190          * GCOn enabled.
 191          */
 192         dcpart_new |= SYSC_DCPART_GCON_BIT;
 193         metag_out32(dcpart_new, SYSC_DCPART(this_thread));
 194 #endif
 195         metag_out32(dcpart_new, SYSC_DCPART(thread));
 196         /* Share icache partition too */
 197         icpart_new = metag_in32(SYSC_ICPART(this_thread));
 198         icpart_old = metag_in32(SYSC_ICPART(thread));
 199         metag_out32(icpart_new, SYSC_ICPART(thread));
 200         __global_unlock2(lflags);
 201
 202         /*
 203          * Log if the cache partitions were altered so the user is aware of any
 204          * potential unintentional cache wastage.
 205          */
 206         dcsz = get_dcache_size();
 207         icsz = get_dcache_size();
 208         describe_cachepart_change(this_thread, "dcache", dcsz,
 209                                   dcpart_this, dcpart_new);
 210         describe_cachepart_change(thread, "dcache", dcsz,
 211                                   dcpart_old, dcpart_new);
 212         describe_cachepart_change(thread, "icache", icsz,
 213                                   icpart_old, icpart_new);
 214 }
 215
 216 int __cpu_up(unsigned int cpu, struct task_struct *idle)
 217 {
 218         unsigned int thread = cpu_2_hwthread_id[cpu];
 219         int ret;
 220
 221         load_pgd(swapper_pg_dir, thread);
 222
 223         flush_tlb_all();
 224
 225         setup_smp_cache(thread);
 226
 227         /*
 228          * Tell the secondary CPU where to find its idle thread's stack.
 229          */
 230         secondary_data_stack = task_stack_page(idle);
 231
 232         wmb();
 233
 234         /*
 235          * Now bring the CPU into our world.
 236          */
 237         ret = boot_secondary(thread, idle);
 238         if (ret == 0) {
 239                 /*
 240                  * CPU was successfully started, wait for it
 241                  * to come online or time out.
 242                  */
 243                 wait_for_completion_timeout(&cpu_running,
 244                                             msecs_to_jiffies(1000));
 245
 246                 if (!cpu_online(cpu))
 247                         ret = -EIO;
 248         }
 249
 250         secondary_data_stack = NULL;
 251
 252         if (ret) {
 253                 pr_crit("CPU%u: processor failed to boot\n", cpu);
 254
 255                 /*
 256                  * FIXME: We need to clean up the new idle thread. --rmk
 257                  */
 258         }
 259
 260         return ret;
 261 }
 262
 263 #ifdef CONFIG_HOTPLUG_CPU
 264
 265 /*
 266  * __cpu_disable runs on the processor to be shutdown.
 267  */
 268 int __cpu_disable(void)
 269 {
 270         unsigned int cpu = smp_processor_id();
 271
 272         /*
 273          * Take this CPU offline.  Once we clear this, we can't return,
 274          * and we must not schedule until we're ready to give up the cpu.
 275          */
 276         set_cpu_online(cpu, false);
 277
 278         /*
 279          * OK - migrate IRQs away from this CPU
 280          */
 281         migrate_irqs();
 282
 283         /*
 284          * Flush user cache and TLB mappings, and then remove this CPU
 285          * from the vm mask set of all processes.
 286          */
 287         flush_cache_all();
 288         local_flush_tlb_all();
 289
 290         clear_tasks_mm_cpumask(cpu);
 291
 292         return 0;
 293 }
 294
 295 /*
 296  * called on the thread which is asking for a CPU to be shutdown -
 297  * waits until shutdown has completed, or it is timed out.
 298  */
 299 void __cpu_die(unsigned int cpu)
 300 {
 301         if (!cpu_wait_death(cpu, 1))
 302                 pr_err("CPU%u: unable to kill\n", cpu);
 303 }
 304
 305 /*
 306  * Called from the idle thread for the CPU which has been shutdown.
 307  *
 308  * Note that we do not return from this function. If this cpu is
 309  * brought online again it will need to run secondary_startup().
 310  */
 311 void cpu_die(void)
 312 {
 313         local_irq_disable();
 314         idle_task_exit();
 315
 316         (void)cpu_report_death();
 317
 318         asm ("XOR       TXENABLE, D0Re0,D0Re0\n");
 319 }
 320 #endif /* CONFIG_HOTPLUG_CPU */
 321
 322 /*
 323  * Called by both boot and secondaries to move global data into
 324  * per-processor storage.
 325  */
 326 void smp_store_cpu_info(unsigned int cpuid)
 327 {
 328         struct cpuinfo_metag *cpu_info = &per_cpu(cpu_data, cpuid);
 329
 330         cpu_info->loops_per_jiffy = loops_per_jiffy;
 331 }
 332
 333 /*
 334  * This is the secondary CPU boot entry.  We're using this CPUs
 335  * idle thread stack and the global page tables.
 336  */
 337 asmlinkage void secondary_start_kernel(void)
 338 {
 339         struct mm_struct *mm = &init_mm;
 340         unsigned int cpu = smp_processor_id();
 341
 342         /*
 343          * All kernel threads share the same mm context; grab a
 344          * reference and switch to it.
 345          */
 346         atomic_inc(&mm->mm_users);
 347         atomic_inc(&mm->mm_count);
 348         current->active_mm = mm;
 349         cpumask_set_cpu(cpu, mm_cpumask(mm));
 350         enter_lazy_tlb(mm, current);
 351         local_flush_tlb_all();
 352
 353         /*
 354          * TODO: Some day it might be useful for each Linux CPU to
 355          * have its own TBI structure. That would allow each Linux CPU
 356          * to run different interrupt handlers for the same IRQ
 357          * number.
 358          *
 359          * For now, simply copying the pointer to the boot CPU's TBI
 360          * structure is sufficient because we always want to run the
 361          * same interrupt handler whatever CPU takes the interrupt.
 362          */
 363         per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
 364
 365         if (!per_cpu(pTBI, cpu))
 366                 panic("No TBI found!");
 367
 368         per_cpu_trap_init(cpu);
 369
 370         preempt_disable();
 371
 372         setup_priv();
 373
 374         notify_cpu_starting(cpu);
 375
 376         pr_info("CPU%u (thread %u): Booted secondary processor\n",
 377                 cpu, cpu_2_hwthread_id[cpu]);
 378
 379         calibrate_delay();
 380         smp_store_cpu_info(cpu);
 381
 382         /*
 383          * OK, now it's safe to let the boot CPU continue
 384          */
 385         set_cpu_online(cpu, true);
 386         complete(&cpu_running);
 387
 388         /*
 389          * Enable local interrupts.
 390          */
 391         tbi_startup_interrupt(TBID_SIGNUM_TRT);
 392         local_irq_enable();
 393
 394         /*
 395          * OK, it's off to the idle thread for us
 396          */
 397         cpu_startup_entry(CPUHP_ONLINE);
 398 }
 399
 400 void __init smp_cpus_done(unsigned int max_cpus)
 401 {
 402         int cpu;
 403         unsigned long bogosum = 0;
 404
 405         for_each_online_cpu(cpu)
 406                 bogosum += per_cpu(cpu_data, cpu).loops_per_jiffy;
 407
 408         pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
 409                 num_online_cpus(),
 410                 bogosum / (500000/HZ),
 411                 (bogosum / (5000/HZ)) % 100);
 412 }
 413
 414 void __init smp_prepare_cpus(unsigned int max_cpus)
 415 {
 416         unsigned int cpu = smp_processor_id();
 417
 418         init_new_context(current, &init_mm);
 419         current_thread_info()->cpu = cpu;
 420
 421         smp_store_cpu_info(cpu);
 422         init_cpu_present(cpu_possible_mask);
 423 }
 424
 425 void __init smp_prepare_boot_cpu(void)
 426 {
 427         unsigned int cpu = smp_processor_id();
 428
 429         per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
 430
 431         if (!per_cpu(pTBI, cpu))
 432                 panic("No TBI found!");
 433 }
 434
 435 static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg);
 436
 437 static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg)
 438 {
 439         unsigned long flags;
 440         unsigned int cpu;
 441         cpumask_t map;
 442
 443         cpumask_clear(&map);
 444         local_irq_save(flags);
 445
 446         for_each_cpu(cpu, mask) {
 447                 struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
 448
 449                 spin_lock(&ipi->lock);
 450
 451                 /*
 452                  * KICK interrupts are queued in hardware so we'll get
 453                  * multiple interrupts if we call smp_cross_call()
 454                  * multiple times for one msg. The problem is that we
 455                  * only have one bit for each message - we can't queue
 456                  * them in software.
 457                  *
 458                  * The first time through ipi_handler() we'll clear
 459                  * the msg bit, having done all the work. But when we
 460                  * return we'll get _another_ interrupt (and another,
 461                  * and another until we've handled all the queued
 462                  * KICKs). Running ipi_handler() when there's no work
 463                  * to do is bad because that's how kick handler
 464                  * chaining detects who the KICK was intended for.
 465                  * See arch/metag/kernel/kick.c for more details.
 466                  *
 467                  * So only add 'cpu' to 'map' if we haven't already
 468                  * queued a KICK interrupt for 'msg'.
 469                  */
 470                 if (!(ipi->bits & (1 << msg))) {
 471                         ipi->bits |= 1 << msg;
 472                         cpumask_set_cpu(cpu, &map);
 473                 }
 474
 475                 spin_unlock(&ipi->lock);
 476         }
 477
 478         /*
 479          * Call the platform specific cross-CPU call function.
 480          */
 481         smp_cross_call(map, msg);
 482
 483         local_irq_restore(flags);
 484 }
 485
 486 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 487 {
 488         send_ipi_message(mask, IPI_CALL_FUNC);
 489 }
 490
 491 void arch_send_call_function_single_ipi(int cpu)
 492 {
 493         send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
 494 }
 495
 496 void show_ipi_list(struct seq_file *p)
 497 {
 498         unsigned int cpu;
 499
 500         seq_puts(p, "IPI:");
 501
 502         for_each_present_cpu(cpu)
 503                 seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count);
 504
 505         seq_putc(p, '\n');
 506 }
 507
 508 static DEFINE_SPINLOCK(stop_lock);
 509
 510 /*
 511  * Main handler for inter-processor interrupts
 512  *
 513  * For Meta, the ipimask now only identifies a single
 514  * category of IPI (Bit 1 IPIs have been replaced by a
 515  * different mechanism):
 516  *
 517  *  Bit 0 - Inter-processor function call
 518  */
 519 static int do_IPI(void)
 520 {
 521         unsigned int cpu = smp_processor_id();
 522         struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
 523         unsigned long msgs, nextmsg;
 524         int handled = 0;
 525
 526         ipi->ipi_count++;
 527
 528         spin_lock(&ipi->lock);
 529         msgs = ipi->bits;
 530         nextmsg = msgs & -msgs;
 531         ipi->bits &= ~nextmsg;
 532         spin_unlock(&ipi->lock);
 533
 534         if (nextmsg) {
 535                 handled = 1;
 536
 537                 nextmsg = ffz(~nextmsg);
 538                 switch (nextmsg) {
 539                 case IPI_RESCHEDULE:
 540                         scheduler_ipi();
 541                         break;
 542
 543                 case IPI_CALL_FUNC:
 544                         generic_smp_call_function_interrupt();
 545                         break;
 546
 547                 default:
 548                         pr_crit("CPU%u: Unknown IPI message 0x%lx\n",
 549                                 cpu, nextmsg);
 550                         break;
 551                 }
 552         }
 553
 554         return handled;
 555 }
 556
 557 void smp_send_reschedule(int cpu)
 558 {
 559         send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
 560 }
 561
 562 static void stop_this_cpu(void *data)
 563 {
 564         unsigned int cpu = smp_processor_id();
 565
 566         if (system_state == SYSTEM_BOOTING ||
 567             system_state == SYSTEM_RUNNING) {
 568                 spin_lock(&stop_lock);
 569                 pr_crit("CPU%u: stopping\n", cpu);
 570                 dump_stack();
 571                 spin_unlock(&stop_lock);
 572         }
 573
 574         set_cpu_online(cpu, false);
 575
 576         local_irq_disable();
 577
 578         hard_processor_halt(HALT_OK);
 579 }
 580
 581 void smp_send_stop(void)
 582 {
 583         smp_call_function(stop_this_cpu, NULL, 0);
 584 }
 585
 586 /*
 587  * not supported here
 588  */
 589 int setup_profiling_timer(unsigned int multiplier)
 590 {
 591         return -EINVAL;
 592 }
 593
 594 /*
 595  * We use KICKs for inter-processor interrupts.
 596  *
 597  * For every CPU in "callmap" the IPI data must already have been
 598  * stored in that CPU's "ipi_data" member prior to calling this
 599  * function.
 600  */
 601 static void kick_raise_softirq(cpumask_t callmap, unsigned int irq)
 602 {
 603         int cpu;
 604
 605         for_each_cpu(cpu, &callmap) {
 606                 unsigned int thread;
 607
 608                 thread = cpu_2_hwthread_id[cpu];
 609
 610                 BUG_ON(thread == BAD_HWTHREAD_ID);
 611
 612                 metag_out32(1, T0KICKI + (thread * TnXKICK_STRIDE));
 613         }
 614 }
 615
 616 static TBIRES ipi_handler(TBIRES State, int SigNum, int Triggers,
 617                    int Inst, PTBI pTBI, int *handled)
 618 {
 619         *handled = do_IPI();
 620
 621         return State;
 622 }
 623
 624 static struct kick_irq_handler ipi_irq = {
 625         .func = ipi_handler,
 626 };
 627
 628 static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg)
 629 {
 630         kick_raise_softirq(callmap, 1);
 631 }
 632
 633 static inline unsigned int get_core_count(void)
 634 {
 635         int i;
 636         unsigned int ret = 0;
 637
 638         for (i = 0; i < CONFIG_NR_CPUS; i++) {
 639                 if (core_reg_read(TXUCT_ID, TXENABLE_REGNUM, i))
 640                         ret++;
 641         }
 642
 643         return ret;
 644 }
 645
 646 /*
 647  * Initialise the CPU possible map early - this describes the CPUs
 648  * which may be present or become present in the system.
 649  */
 650 void __init smp_init_cpus(void)
 651 {
 652         unsigned int i, ncores = get_core_count();
 653
 654         /* If no hwthread_map early param was set use default mapping */
 655         for (i = 0; i < NR_CPUS; i++)
 656                 if (cpu_2_hwthread_id[i] == BAD_HWTHREAD_ID) {
 657                         cpu_2_hwthread_id[i] = i;
 658                         hwthread_id_2_cpu[i] = i;
 659                 }
 660
 661         for (i = 0; i < ncores; i++)
 662                 set_cpu_possible(i, true);
 663
 664         kick_register_func(&ipi_irq);
 665 }