kernel/arch/x86/kernel/irq.c

   1 /*
   2  * Common interrupt code for 32 and 64 bit
   3  */
   4 #include <linux/cpu.h>
   5 #include <linux/interrupt.h>
   6 #include <linux/kernel_stat.h>
   7 #include <linux/of.h>
   8 #include <linux/seq_file.h>
   9 #include <linux/smp.h>
  10 #include <linux/ftrace.h>
  11 #include <linux/delay.h>
  12 #include <linux/export.h>
  13
  14 #include <asm/apic.h>
  15 #include <asm/io_apic.h>
  16 #include <asm/irq.h>
  17 #include <asm/idle.h>
  18 #include <asm/mce.h>
  19 #include <asm/hw_irq.h>
  20 #include <asm/desc.h>
  21
  22 #define CREATE_TRACE_POINTS
  23 #include <asm/trace/irq_vectors.h>
  24
  25 atomic_t irq_err_count;
  26
  27 /* Function pointer for generic interrupt vector handling */
  28 void (*x86_platform_ipi_callback)(void) = NULL;
  29
  30 /*
  31  * 'what should we do if we get a hw irq event on an illegal vector'.
  32  * each architecture has to answer this themselves.
  33  */
  34 void ack_bad_irq(unsigned int irq)
  35 {
  36         if (printk_ratelimit())
  37                 pr_err("unexpected IRQ trap at vector %02x\n", irq);
  38
  39         /*
  40          * Currently unexpected vectors happen only on SMP and APIC.
  41          * We _must_ ack these because every local APIC has only N
  42          * irq slots per priority level, and a 'hanging, unacked' IRQ
  43          * holds up an irq slot - in excessive cases (when multiple
  44          * unexpected vectors occur) that might lock up the APIC
  45          * completely.
  46          * But only ack when the APIC is enabled -AK
  47          */
  48         ack_APIC_irq();
  49 }
  50
  51 #define irq_stats(x)            (&per_cpu(irq_stat, x))
  52 /*
  53  * /proc/interrupts printing for arch specific interrupts
  54  */
  55 int arch_show_interrupts(struct seq_file *p, int prec)
  56 {
  57         int j;
  58
  59         seq_printf(p, "%*s: ", prec, "NMI");
  60         for_each_online_cpu(j)
  61                 seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
  62         seq_puts(p, "  Non-maskable interrupts\n");
  63 #ifdef CONFIG_X86_LOCAL_APIC
  64         seq_printf(p, "%*s: ", prec, "LOC");
  65         for_each_online_cpu(j)
  66                 seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
  67         seq_puts(p, "  Local timer interrupts\n");
  68
  69         seq_printf(p, "%*s: ", prec, "SPU");
  70         for_each_online_cpu(j)
  71                 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
  72         seq_puts(p, "  Spurious interrupts\n");
  73         seq_printf(p, "%*s: ", prec, "PMI");
  74         for_each_online_cpu(j)
  75                 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
  76         seq_puts(p, "  Performance monitoring interrupts\n");
  77         seq_printf(p, "%*s: ", prec, "IWI");
  78         for_each_online_cpu(j)
  79                 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
  80         seq_puts(p, "  IRQ work interrupts\n");
  81         seq_printf(p, "%*s: ", prec, "RTR");
  82         for_each_online_cpu(j)
  83                 seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
  84         seq_puts(p, "  APIC ICR read retries\n");
  85 #endif
  86         if (x86_platform_ipi_callback) {
  87                 seq_printf(p, "%*s: ", prec, "PLT");
  88                 for_each_online_cpu(j)
  89                         seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
  90                 seq_puts(p, "  Platform interrupts\n");
  91         }
  92 #ifdef CONFIG_SMP
  93         seq_printf(p, "%*s: ", prec, "RES");
  94         for_each_online_cpu(j)
  95                 seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
  96         seq_puts(p, "  Rescheduling interrupts\n");
  97         seq_printf(p, "%*s: ", prec, "CAL");
  98         for_each_online_cpu(j)
  99                 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
 100                                         irq_stats(j)->irq_tlb_count);
 101         seq_puts(p, "  Function call interrupts\n");
 102         seq_printf(p, "%*s: ", prec, "TLB");
 103         for_each_online_cpu(j)
 104                 seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
 105         seq_puts(p, "  TLB shootdowns\n");
 106 #endif
 107 #ifdef CONFIG_X86_THERMAL_VECTOR
 108         seq_printf(p, "%*s: ", prec, "TRM");
 109         for_each_online_cpu(j)
 110                 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
 111         seq_puts(p, "  Thermal event interrupts\n");
 112 #endif
 113 #ifdef CONFIG_X86_MCE_THRESHOLD
 114         seq_printf(p, "%*s: ", prec, "THR");
 115         for_each_online_cpu(j)
 116                 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
 117         seq_puts(p, "  Threshold APIC interrupts\n");
 118 #endif
 119 #ifdef CONFIG_X86_MCE
 120         seq_printf(p, "%*s: ", prec, "MCE");
 121         for_each_online_cpu(j)
 122                 seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
 123         seq_puts(p, "  Machine check exceptions\n");
 124         seq_printf(p, "%*s: ", prec, "MCP");
 125         for_each_online_cpu(j)
 126                 seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
 127         seq_puts(p, "  Machine check polls\n");
 128 #endif
 129 #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
 130         seq_printf(p, "%*s: ", prec, "HYP");
 131         for_each_online_cpu(j)
 132                 seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count);
 133         seq_puts(p, "  Hypervisor callback interrupts\n");
 134 #endif
 135         seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
 136 #if defined(CONFIG_X86_IO_APIC)
 137         seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
 138 #endif
 139         return 0;
 140 }
 141
 142 /*
 143  * /proc/stat helpers
 144  */
 145 u64 arch_irq_stat_cpu(unsigned int cpu)
 146 {
 147         u64 sum = irq_stats(cpu)->__nmi_count;
 148
 149 #ifdef CONFIG_X86_LOCAL_APIC
 150         sum += irq_stats(cpu)->apic_timer_irqs;
 151         sum += irq_stats(cpu)->irq_spurious_count;
 152         sum += irq_stats(cpu)->apic_perf_irqs;
 153         sum += irq_stats(cpu)->apic_irq_work_irqs;
 154         sum += irq_stats(cpu)->icr_read_retry_count;
 155 #endif
 156         if (x86_platform_ipi_callback)
 157                 sum += irq_stats(cpu)->x86_platform_ipis;
 158 #ifdef CONFIG_SMP
 159         sum += irq_stats(cpu)->irq_resched_count;
 160         sum += irq_stats(cpu)->irq_call_count;
 161 #endif
 162 #ifdef CONFIG_X86_THERMAL_VECTOR
 163         sum += irq_stats(cpu)->irq_thermal_count;
 164 #endif
 165 #ifdef CONFIG_X86_MCE_THRESHOLD
 166         sum += irq_stats(cpu)->irq_threshold_count;
 167 #endif
 168 #ifdef CONFIG_X86_MCE
 169         sum += per_cpu(mce_exception_count, cpu);
 170         sum += per_cpu(mce_poll_count, cpu);
 171 #endif
 172         return sum;
 173 }
 174
 175 u64 arch_irq_stat(void)
 176 {
 177         u64 sum = atomic_read(&irq_err_count);
 178         return sum;
 179 }
 180
 181
 182 /*
 183  * do_IRQ handles all normal device IRQ's (the special
 184  * SMP cross-CPU interrupts have their own specific
 185  * handlers).
 186  */
 187 __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 188 {
 189         struct pt_regs *old_regs = set_irq_regs(regs);
 190
 191         /* high bit used in ret_from_ code  */
 192         unsigned vector = ~regs->orig_ax;
 193         unsigned irq;
 194
 195         irq_enter();
 196         exit_idle();
 197
 198         irq = __this_cpu_read(vector_irq[vector]);
 199
 200         if (!handle_irq(irq, regs)) {
 201                 ack_APIC_irq();
 202
 203                 if (irq != VECTOR_RETRIGGERED) {
 204                         pr_emerg_ratelimited("%s: %d.%d No irq handler for vector (irq %d)\n",
 205                                              __func__, smp_processor_id(),
 206                                              vector, irq);
 207                 } else {
 208                         __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
 209                 }
 210         }
 211
 212         irq_exit();
 213
 214         set_irq_regs(old_regs);
 215         return 1;
 216 }
 217
 218 /*
 219  * Handler for X86_PLATFORM_IPI_VECTOR.
 220  */
 221 void __smp_x86_platform_ipi(void)
 222 {
 223         inc_irq_stat(x86_platform_ipis);
 224
 225         if (x86_platform_ipi_callback)
 226                 x86_platform_ipi_callback();
 227 }
 228
 229 __visible void smp_x86_platform_ipi(struct pt_regs *regs)
 230 {
 231         struct pt_regs *old_regs = set_irq_regs(regs);
 232
 233         entering_ack_irq();
 234         __smp_x86_platform_ipi();
 235         exiting_irq();
 236         set_irq_regs(old_regs);
 237 }
 238
 239 #ifdef CONFIG_HAVE_KVM
 240 /*
 241  * Handler for POSTED_INTERRUPT_VECTOR.
 242  */
 243 __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
 244 {
 245         struct pt_regs *old_regs = set_irq_regs(regs);
 246
 247         ack_APIC_irq();
 248
 249         irq_enter();
 250
 251         exit_idle();
 252
 253         inc_irq_stat(kvm_posted_intr_ipis);
 254
 255         irq_exit();
 256
 257         set_irq_regs(old_regs);
 258 }
 259 #endif
 260
 261 __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
 262 {
 263         struct pt_regs *old_regs = set_irq_regs(regs);
 264
 265         entering_ack_irq();
 266         trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
 267         __smp_x86_platform_ipi();
 268         trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
 269         exiting_irq();
 270         set_irq_regs(old_regs);
 271 }
 272
 273 EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
 274
 275 #ifdef CONFIG_HOTPLUG_CPU
 276
 277 /* These two declarations are only used in check_irq_vectors_for_cpu_disable()
 278  * below, which is protected by stop_machine().  Putting them on the stack
 279  * results in a stack frame overflow.  Dynamically allocating could result in a
 280  * failure so declare these two cpumasks as global.
 281  */
 282 static struct cpumask affinity_new, online_new;
 283
 284 /*
 285  * This cpu is going to be removed and its vectors migrated to the remaining
 286  * online cpus.  Check to see if there are enough vectors in the remaining cpus.
 287  * This function is protected by stop_machine().
 288  */
 289 int check_irq_vectors_for_cpu_disable(void)
 290 {
 291         int irq, cpu;
 292         unsigned int this_cpu, vector, this_count, count;
 293         struct irq_desc *desc;
 294         struct irq_data *data;
 295
 296         this_cpu = smp_processor_id();
 297         cpumask_copy(&online_new, cpu_online_mask);
 298         cpumask_clear_cpu(this_cpu, &online_new);
 299
 300         this_count = 0;
 301         for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
 302                 irq = __this_cpu_read(vector_irq[vector]);
 303                 if (irq >= 0) {
 304                         desc = irq_to_desc(irq);
 305                         if (!desc)
 306                                 continue;
 307
 308                         data = irq_desc_get_irq_data(desc);
 309                         cpumask_copy(&affinity_new, data->affinity);
 310                         cpumask_clear_cpu(this_cpu, &affinity_new);
 311
 312                         /* Do not count inactive or per-cpu irqs. */
 313                         if (!irq_has_action(irq) || irqd_is_per_cpu(data))
 314                                 continue;
 315
 316                         /*
 317                          * A single irq may be mapped to multiple
 318                          * cpu's vector_irq[] (for example IOAPIC cluster
 319                          * mode).  In this case we have two
 320                          * possibilities:
 321                          *
 322                          * 1) the resulting affinity mask is empty; that is
 323                          * this the down'd cpu is the last cpu in the irq's
 324                          * affinity mask, or
 325                          *
 326                          * 2) the resulting affinity mask is no longer
 327                          * a subset of the online cpus but the affinity
 328                          * mask is not zero; that is the down'd cpu is the
 329                          * last online cpu in a user set affinity mask.
 330                          */
 331                         if (cpumask_empty(&affinity_new) ||
 332                             !cpumask_subset(&affinity_new, &online_new))
 333                                 this_count++;
 334                 }
 335         }
 336
 337         count = 0;
 338         for_each_online_cpu(cpu) {
 339                 if (cpu == this_cpu)
 340                         continue;
 341                 /*
 342                  * We scan from FIRST_EXTERNAL_VECTOR to first system
 343                  * vector. If the vector is marked in the used vectors
 344                  * bitmap or an irq is assigned to it, we don't count
 345                  * it as available.
 346                  */
 347                 for (vector = FIRST_EXTERNAL_VECTOR;
 348                      vector < first_system_vector; vector++) {
 349                         if (!test_bit(vector, used_vectors) &&
 350                             per_cpu(vector_irq, cpu)[vector] < 0)
 351                                         count++;
 352                 }
 353         }
 354
 355         if (count < this_count) {
 356                 pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n",
 357                         this_cpu, this_count, count);
 358                 return -ERANGE;
 359         }
 360         return 0;
 361 }
 362
 363 /* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
 364 void fixup_irqs(void)
 365 {
 366         unsigned int irq, vector;
 367         static int warned;
 368         struct irq_desc *desc;
 369         struct irq_data *data;
 370         struct irq_chip *chip;
 371         int ret;
 372
 373         for_each_irq_desc(irq, desc) {
 374                 int break_affinity = 0;
 375                 int set_affinity = 1;
 376                 const struct cpumask *affinity;
 377
 378                 if (!desc)
 379                         continue;
 380                 if (irq == 2)
 381                         continue;
 382
 383                 /* interrupt's are disabled at this point */
 384                 raw_spin_lock(&desc->lock);
 385
 386                 data = irq_desc_get_irq_data(desc);
 387                 affinity = data->affinity;
 388                 if (!irq_has_action(irq) || irqd_is_per_cpu(data) ||
 389                     cpumask_subset(affinity, cpu_online_mask)) {
 390                         raw_spin_unlock(&desc->lock);
 391                         continue;
 392                 }
 393
 394                 /*
 395                  * Complete the irq move. This cpu is going down and for
 396                  * non intr-remapping case, we can't wait till this interrupt
 397                  * arrives at this cpu before completing the irq move.
 398                  */
 399                 irq_force_complete_move(irq);
 400
 401                 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
 402                         break_affinity = 1;
 403                         affinity = cpu_online_mask;
 404                 }
 405
 406                 chip = irq_data_get_irq_chip(data);
 407                 if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
 408                         chip->irq_mask(data);
 409
 410                 if (chip->irq_set_affinity) {
 411                         ret = chip->irq_set_affinity(data, affinity, true);
 412                         if (ret == -ENOSPC)
 413                                 pr_crit("IRQ %d set affinity failed because there are no available vectors.  The device assigned to this IRQ is unstable.\n", irq);
 414                 } else {
 415                         if (!(warned++))
 416                                 set_affinity = 0;
 417                 }
 418
 419                 /*
 420                  * We unmask if the irq was not marked masked by the
 421                  * core code. That respects the lazy irq disable
 422                  * behaviour.
 423                  */
 424                 if (!irqd_can_move_in_process_context(data) &&
 425                     !irqd_irq_masked(data) && chip->irq_unmask)
 426                         chip->irq_unmask(data);
 427
 428                 raw_spin_unlock(&desc->lock);
 429
 430                 if (break_affinity && set_affinity)
 431                         pr_notice("Broke affinity for irq %i\n", irq);
 432                 else if (!set_affinity)
 433                         pr_notice("Cannot set affinity for irq %i\n", irq);
 434         }
 435
 436         /*
 437          * We can remove mdelay() and then send spuriuous interrupts to
 438          * new cpu targets for all the irqs that were handled previously by
 439          * this cpu. While it works, I have seen spurious interrupt messages
 440          * (nothing wrong but still...).
 441          *
 442          * So for now, retain mdelay(1) and check the IRR and then send those
 443          * interrupts to new targets as this cpu is already offlined...
 444          */
 445         mdelay(1);
 446
 447         for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
 448                 unsigned int irr;
 449
 450                 if (__this_cpu_read(vector_irq[vector]) <= VECTOR_UNDEFINED)
 451                         continue;
 452
 453                 irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
 454                 if (irr  & (1 << (vector % 32))) {
 455                         irq = __this_cpu_read(vector_irq[vector]);
 456
 457                         desc = irq_to_desc(irq);
 458                         data = irq_desc_get_irq_data(desc);
 459                         chip = irq_data_get_irq_chip(data);
 460                         raw_spin_lock(&desc->lock);
 461                         if (chip->irq_retrigger) {
 462                                 chip->irq_retrigger(data);
 463                                 __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED);
 464                         }
 465                         raw_spin_unlock(&desc->lock);
 466                 }
 467                 if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED)
 468                         __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
 469         }
 470 }
 471 #endif