Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  *      Distribute under GPLv2.
7  *
8  *      Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/export.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/delay.h>
25 #include <linux/ftrace.h>
26 #include <linux/smp.h>
27 #include <linux/smpboot.h>
28 #include <linux/tick.h>
29 #include <linux/locallock.h>
30 #include <linux/irq.h>
31
32 #define CREATE_TRACE_POINTS
33 #include <trace/events/irq.h>
34
35 /*
36    - No shared variables, all the data are CPU local.
37    - If a softirq needs serialization, let it serialize itself
38      by its own spinlocks.
39    - Even if softirq is serialized, only local cpu is marked for
40      execution. Hence, we get something sort of weak cpu binding.
41      Though it is still not clear, will it result in better locality
42      or will not.
43
44    Examples:
45    - NET RX softirq. It is multithreaded and does not require
46      any global serialization.
47    - NET TX softirq. It kicks software netdevice queues, hence
48      it is logically serialized per device, but this serialization
49      is invisible to common code.
50    - Tasklets: serialized wrt itself.
51  */
52
53 #ifndef __ARCH_IRQ_STAT
54 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
55 EXPORT_SYMBOL(irq_stat);
56 #endif
57
58 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
59
60 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
61
62 const char * const softirq_to_name[NR_SOFTIRQS] = {
63         "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
64         "TASKLET", "SCHED", "HRTIMER", "RCU"
65 };
66
67 #ifdef CONFIG_NO_HZ_COMMON
68 # ifdef CONFIG_PREEMPT_RT_FULL
69
70 struct softirq_runner {
71         struct task_struct *runner[NR_SOFTIRQS];
72 };
73
74 static DEFINE_PER_CPU(struct softirq_runner, softirq_runners);
75
76 static inline void softirq_set_runner(unsigned int sirq)
77 {
78         struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
79
80         sr->runner[sirq] = current;
81 }
82
83 static inline void softirq_clr_runner(unsigned int sirq)
84 {
85         struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
86
87         sr->runner[sirq] = NULL;
88 }
89
90 /*
91  * On preempt-rt a softirq running context might be blocked on a
92  * lock. There might be no other runnable task on this CPU because the
93  * lock owner runs on some other CPU. So we have to go into idle with
94  * the pending bit set. Therefor we need to check this otherwise we
95  * warn about false positives which confuses users and defeats the
96  * whole purpose of this test.
97  *
98  * This code is called with interrupts disabled.
99  */
100 void softirq_check_pending_idle(void)
101 {
102         static int rate_limit;
103         struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
104         u32 warnpending;
105         int i;
106
107         if (rate_limit >= 10)
108                 return;
109
110         warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
111         for (i = 0; i < NR_SOFTIRQS; i++) {
112                 struct task_struct *tsk = sr->runner[i];
113
114                 /*
115                  * The wakeup code in rtmutex.c wakes up the task
116                  * _before_ it sets pi_blocked_on to NULL under
117                  * tsk->pi_lock. So we need to check for both: state
118                  * and pi_blocked_on.
119                  */
120                 if (tsk) {
121                         raw_spin_lock(&tsk->pi_lock);
122                         if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) {
123                                 /* Clear all bits pending in that task */
124                                 warnpending &= ~(tsk->softirqs_raised);
125                                 warnpending &= ~(1 << i);
126                         }
127                         raw_spin_unlock(&tsk->pi_lock);
128                 }
129         }
130
131         if (warnpending) {
132                 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
133                        warnpending);
134                 rate_limit++;
135         }
136 }
137 # else
138 /*
139  * On !PREEMPT_RT we just printk rate limited:
140  */
141 void softirq_check_pending_idle(void)
142 {
143         static int rate_limit;
144
145         if (rate_limit < 10 &&
146                         (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
147                 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
148                        local_softirq_pending());
149                 rate_limit++;
150         }
151 }
152 # endif
153
154 #else /* !CONFIG_NO_HZ_COMMON */
155 static inline void softirq_set_runner(unsigned int sirq) { }
156 static inline void softirq_clr_runner(unsigned int sirq) { }
157 #endif
158
159 /*
160  * we cannot loop indefinitely here to avoid userspace starvation,
161  * but we also don't want to introduce a worst case 1/HZ latency
162  * to the pending events, so lets the scheduler to balance
163  * the softirq load for us.
164  */
165 static void wakeup_softirqd(void)
166 {
167         /* Interrupts are disabled: no need to stop preemption */
168         struct task_struct *tsk = __this_cpu_read(ksoftirqd);
169
170         if (tsk && tsk->state != TASK_RUNNING)
171                 wake_up_process(tsk);
172 }
173
174 static void handle_softirq(unsigned int vec_nr)
175 {
176         struct softirq_action *h = softirq_vec + vec_nr;
177         int prev_count;
178
179         prev_count = preempt_count();
180
181         kstat_incr_softirqs_this_cpu(vec_nr);
182
183         trace_softirq_entry(vec_nr);
184         h->action(h);
185         trace_softirq_exit(vec_nr);
186         if (unlikely(prev_count != preempt_count())) {
187                 pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
188                        vec_nr, softirq_to_name[vec_nr], h->action,
189                        prev_count, preempt_count());
190                 preempt_count_set(prev_count);
191         }
192 }
193
194 #ifndef CONFIG_PREEMPT_RT_FULL
195 static inline int ksoftirqd_softirq_pending(void)
196 {
197         return local_softirq_pending();
198 }
199
200 static void handle_pending_softirqs(u32 pending)
201 {
202         struct softirq_action *h = softirq_vec;
203         int softirq_bit;
204
205         local_irq_enable();
206
207         h = softirq_vec;
208
209         while ((softirq_bit = ffs(pending))) {
210                 unsigned int vec_nr;
211
212                 h += softirq_bit - 1;
213                 vec_nr = h - softirq_vec;
214                 handle_softirq(vec_nr);
215
216                 h++;
217                 pending >>= softirq_bit;
218         }
219
220         rcu_bh_qs();
221         local_irq_disable();
222 }
223
224 static void run_ksoftirqd(unsigned int cpu)
225 {
226         local_irq_disable();
227         if (ksoftirqd_softirq_pending()) {
228                 __do_softirq();
229                 local_irq_enable();
230                 cond_resched_rcu_qs();
231                 return;
232         }
233         local_irq_enable();
234 }
235
236 /*
237  * preempt_count and SOFTIRQ_OFFSET usage:
238  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
239  *   softirq processing.
240  * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
241  *   on local_bh_disable or local_bh_enable.
242  * This lets us distinguish between whether we are currently processing
243  * softirq and whether we just have bh disabled.
244  */
245
246 /*
247  * This one is for softirq.c-internal use,
248  * where hardirqs are disabled legitimately:
249  */
250 #ifdef CONFIG_TRACE_IRQFLAGS
251 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
252 {
253         unsigned long flags;
254
255         WARN_ON_ONCE(in_irq());
256
257         raw_local_irq_save(flags);
258         /*
259          * The preempt tracer hooks into preempt_count_add and will break
260          * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
261          * is set and before current->softirq_enabled is cleared.
262          * We must manually increment preempt_count here and manually
263          * call the trace_preempt_off later.
264          */
265         __preempt_count_add(cnt);
266         /*
267          * Were softirqs turned off above:
268          */
269         if (softirq_count() == (cnt & SOFTIRQ_MASK))
270                 trace_softirqs_off(ip);
271         raw_local_irq_restore(flags);
272
273         if (preempt_count() == cnt) {
274 #ifdef CONFIG_DEBUG_PREEMPT
275                 current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1);
276 #endif
277                 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
278         }
279 }
280 EXPORT_SYMBOL(__local_bh_disable_ip);
281 #endif /* CONFIG_TRACE_IRQFLAGS */
282
283 static void __local_bh_enable(unsigned int cnt)
284 {
285         WARN_ON_ONCE(!irqs_disabled());
286
287         if (softirq_count() == (cnt & SOFTIRQ_MASK))
288                 trace_softirqs_on(_RET_IP_);
289         preempt_count_sub(cnt);
290 }
291
292 /*
293  * Special-case - softirqs can safely be enabled in
294  * cond_resched_softirq(), or by __do_softirq(),
295  * without processing still-pending softirqs:
296  */
297 void _local_bh_enable(void)
298 {
299         WARN_ON_ONCE(in_irq());
300         __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
301 }
302 EXPORT_SYMBOL(_local_bh_enable);
303
304 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
305 {
306         WARN_ON_ONCE(in_irq() || irqs_disabled());
307 #ifdef CONFIG_TRACE_IRQFLAGS
308         local_irq_disable();
309 #endif
310         /*
311          * Are softirqs going to be turned on now:
312          */
313         if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
314                 trace_softirqs_on(ip);
315         /*
316          * Keep preemption disabled until we are done with
317          * softirq processing:
318          */
319         preempt_count_sub(cnt - 1);
320
321         if (unlikely(!in_interrupt() && local_softirq_pending())) {
322                 /*
323                  * Run softirq if any pending. And do it in its own stack
324                  * as we may be calling this deep in a task call stack already.
325                  */
326                 do_softirq();
327         }
328
329         preempt_count_dec();
330 #ifdef CONFIG_TRACE_IRQFLAGS
331         local_irq_enable();
332 #endif
333         preempt_check_resched();
334 }
335 EXPORT_SYMBOL(__local_bh_enable_ip);
336
337 /*
338  * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
339  * but break the loop if need_resched() is set or after 2 ms.
340  * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
341  * certain cases, such as stop_machine(), jiffies may cease to
342  * increment and so we need the MAX_SOFTIRQ_RESTART limit as
343  * well to make sure we eventually return from this method.
344  *
345  * These limits have been established via experimentation.
346  * The two things to balance is latency against fairness -
347  * we want to handle softirqs as soon as possible, but they
348  * should not be able to lock up the box.
349  */
350 #define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
351 #define MAX_SOFTIRQ_RESTART 10
352
353 #ifdef CONFIG_TRACE_IRQFLAGS
354 /*
355  * When we run softirqs from irq_exit() and thus on the hardirq stack we need
356  * to keep the lockdep irq context tracking as tight as possible in order to
357  * not miss-qualify lock contexts and miss possible deadlocks.
358  */
359
360 static inline bool lockdep_softirq_start(void)
361 {
362         bool in_hardirq = false;
363
364         if (trace_hardirq_context(current)) {
365                 in_hardirq = true;
366                 trace_hardirq_exit();
367         }
368
369         lockdep_softirq_enter();
370
371         return in_hardirq;
372 }
373
374 static inline void lockdep_softirq_end(bool in_hardirq)
375 {
376         lockdep_softirq_exit();
377
378         if (in_hardirq)
379                 trace_hardirq_enter();
380 }
381 #else
382 static inline bool lockdep_softirq_start(void) { return false; }
383 static inline void lockdep_softirq_end(bool in_hardirq) { }
384 #endif
385
386 asmlinkage __visible void __do_softirq(void)
387 {
388         unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
389         unsigned long old_flags = current->flags;
390         int max_restart = MAX_SOFTIRQ_RESTART;
391         bool in_hardirq;
392         __u32 pending;
393
394         /*
395          * Mask out PF_MEMALLOC s current task context is borrowed for the
396          * softirq. A softirq handled such as network RX might set PF_MEMALLOC
397          * again if the socket is related to swap
398          */
399         current->flags &= ~PF_MEMALLOC;
400
401         pending = local_softirq_pending();
402         account_irq_enter_time(current);
403
404         __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
405         in_hardirq = lockdep_softirq_start();
406
407 restart:
408         /* Reset the pending bitmask before enabling irqs */
409         set_softirq_pending(0);
410
411         handle_pending_softirqs(pending);
412
413         pending = local_softirq_pending();
414         if (pending) {
415                 if (time_before(jiffies, end) && !need_resched() &&
416                     --max_restart)
417                         goto restart;
418
419                 wakeup_softirqd();
420         }
421
422         lockdep_softirq_end(in_hardirq);
423         account_irq_exit_time(current);
424         __local_bh_enable(SOFTIRQ_OFFSET);
425         WARN_ON_ONCE(in_interrupt());
426         tsk_restore_flags(current, old_flags, PF_MEMALLOC);
427 }
428
429 asmlinkage __visible void do_softirq(void)
430 {
431         __u32 pending;
432         unsigned long flags;
433
434         if (in_interrupt())
435                 return;
436
437         local_irq_save(flags);
438
439         pending = local_softirq_pending();
440
441         if (pending)
442                 do_softirq_own_stack();
443
444         local_irq_restore(flags);
445 }
446
447 /*
448  * This function must run with irqs disabled!
449  */
450 void raise_softirq_irqoff(unsigned int nr)
451 {
452         __raise_softirq_irqoff(nr);
453
454         /*
455          * If we're in an interrupt or softirq, we're done
456          * (this also catches softirq-disabled code). We will
457          * actually run the softirq once we return from
458          * the irq or softirq.
459          *
460          * Otherwise we wake up ksoftirqd to make sure we
461          * schedule the softirq soon.
462          */
463         if (!in_interrupt())
464                 wakeup_softirqd();
465 }
466
467 void __raise_softirq_irqoff(unsigned int nr)
468 {
469         trace_softirq_raise(nr);
470         or_softirq_pending(1UL << nr);
471 }
472
473 static inline void local_bh_disable_nort(void) { local_bh_disable(); }
474 static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
475 static void ksoftirqd_set_sched_params(unsigned int cpu) { }
476 static void ksoftirqd_clr_sched_params(unsigned int cpu, bool online) { }
477
478 #else /* !PREEMPT_RT_FULL */
479
480 /*
481  * On RT we serialize softirq execution with a cpu local lock per softirq
482  */
483 static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks);
484
485 void __init softirq_early_init(void)
486 {
487         int i;
488
489         for (i = 0; i < NR_SOFTIRQS; i++)
490                 local_irq_lock_init(local_softirq_locks[i]);
491 }
492
493 static void lock_softirq(int which)
494 {
495         local_lock(local_softirq_locks[which]);
496 }
497
498 static void unlock_softirq(int which)
499 {
500         local_unlock(local_softirq_locks[which]);
501 }
502
503 static void do_single_softirq(int which)
504 {
505         unsigned long old_flags = current->flags;
506
507         current->flags &= ~PF_MEMALLOC;
508         vtime_account_irq_enter(current);
509         current->flags |= PF_IN_SOFTIRQ;
510         lockdep_softirq_enter();
511         local_irq_enable();
512         handle_softirq(which);
513         local_irq_disable();
514         lockdep_softirq_exit();
515         current->flags &= ~PF_IN_SOFTIRQ;
516         vtime_account_irq_enter(current);
517         tsk_restore_flags(current, old_flags, PF_MEMALLOC);
518 }
519
520 /*
521  * Called with interrupts disabled. Process softirqs which were raised
522  * in current context (or on behalf of ksoftirqd).
523  */
524 static void do_current_softirqs(void)
525 {
526         while (current->softirqs_raised) {
527                 int i = __ffs(current->softirqs_raised);
528                 unsigned int pending, mask = (1U << i);
529
530                 current->softirqs_raised &= ~mask;
531                 local_irq_enable();
532
533                 /*
534                  * If the lock is contended, we boost the owner to
535                  * process the softirq or leave the critical section
536                  * now.
537                  */
538                 lock_softirq(i);
539                 local_irq_disable();
540                 softirq_set_runner(i);
541                 /*
542                  * Check with the local_softirq_pending() bits,
543                  * whether we need to process this still or if someone
544                  * else took care of it.
545                  */
546                 pending = local_softirq_pending();
547                 if (pending & mask) {
548                         set_softirq_pending(pending & ~mask);
549                         do_single_softirq(i);
550                 }
551                 softirq_clr_runner(i);
552                 unlock_softirq(i);
553                 WARN_ON(current->softirq_nestcnt != 1);
554         }
555 }
556
557 static void __local_bh_disable(void)
558 {
559         if (++current->softirq_nestcnt == 1)
560                 migrate_disable();
561 }
562
563 void local_bh_disable(void)
564 {
565         __local_bh_disable();
566 }
567 EXPORT_SYMBOL(local_bh_disable);
568
569 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
570 {
571         __local_bh_disable();
572         if (cnt & PREEMPT_CHECK_OFFSET)
573                 preempt_disable();
574 }
575
576 static void __local_bh_enable(void)
577 {
578         if (WARN_ON(current->softirq_nestcnt == 0))
579                 return;
580
581         local_irq_disable();
582         if (current->softirq_nestcnt == 1 && current->softirqs_raised)
583                 do_current_softirqs();
584         local_irq_enable();
585
586         if (--current->softirq_nestcnt == 0)
587                 migrate_enable();
588 }
589
590 void local_bh_enable(void)
591 {
592         __local_bh_enable();
593 }
594 EXPORT_SYMBOL(local_bh_enable);
595
596 extern void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
597 {
598         __local_bh_enable();
599         if (cnt & PREEMPT_CHECK_OFFSET)
600                 preempt_enable();
601 }
602
603 void local_bh_enable_ip(unsigned long ip)
604 {
605         local_bh_enable();
606 }
607 EXPORT_SYMBOL(local_bh_enable_ip);
608
609 void _local_bh_enable(void)
610 {
611         if (WARN_ON(current->softirq_nestcnt == 0))
612                 return;
613         if (--current->softirq_nestcnt == 0)
614                 migrate_enable();
615 }
616 EXPORT_SYMBOL(_local_bh_enable);
617
618 int in_serving_softirq(void)
619 {
620         return current->flags & PF_IN_SOFTIRQ;
621 }
622 EXPORT_SYMBOL(in_serving_softirq);
623
624 /* Called with preemption disabled */
625 static void run_ksoftirqd(unsigned int cpu)
626 {
627         local_irq_disable();
628         current->softirq_nestcnt++;
629
630         do_current_softirqs();
631         current->softirq_nestcnt--;
632         rcu_note_context_switch();
633         local_irq_enable();
634 }
635
636 /*
637  * Called from netif_rx_ni(). Preemption enabled, but migration
638  * disabled. So the cpu can't go away under us.
639  */
640 void thread_do_softirq(void)
641 {
642         if (!in_serving_softirq() && current->softirqs_raised) {
643                 current->softirq_nestcnt++;
644                 do_current_softirqs();
645                 current->softirq_nestcnt--;
646         }
647 }
648
649 static void do_raise_softirq_irqoff(unsigned int nr)
650 {
651         trace_softirq_raise(nr);
652         or_softirq_pending(1UL << nr);
653
654         /*
655          * If we are not in a hard interrupt and inside a bh disabled
656          * region, we simply raise the flag on current. local_bh_enable()
657          * will make sure that the softirq is executed. Otherwise we
658          * delegate it to ksoftirqd.
659          */
660         if (!in_irq() && current->softirq_nestcnt)
661                 current->softirqs_raised |= (1U << nr);
662         else if (__this_cpu_read(ksoftirqd))
663                 __this_cpu_read(ksoftirqd)->softirqs_raised |= (1U << nr);
664 }
665
666 void __raise_softirq_irqoff(unsigned int nr)
667 {
668         do_raise_softirq_irqoff(nr);
669         if (!in_irq() && !current->softirq_nestcnt)
670                 wakeup_softirqd();
671 }
672
673 /*
674  * This function must run with irqs disabled!
675  */
676 void raise_softirq_irqoff(unsigned int nr)
677 {
678         do_raise_softirq_irqoff(nr);
679
680         /*
681          * If we're in an hard interrupt we let irq return code deal
682          * with the wakeup of ksoftirqd.
683          */
684         if (in_irq())
685                 return;
686         /*
687          * If we are in thread context but outside of a bh disabled
688          * region, we need to wake ksoftirqd as well.
689          *
690          * CHECKME: Some of the places which do that could be wrapped
691          * into local_bh_disable/enable pairs. Though it's unclear
692          * whether this is worth the effort. To find those places just
693          * raise a WARN() if the condition is met.
694          */
695         if (!current->softirq_nestcnt)
696                 wakeup_softirqd();
697 }
698
699 static inline int ksoftirqd_softirq_pending(void)
700 {
701         return current->softirqs_raised;
702 }
703
704 static inline void local_bh_disable_nort(void) { }
705 static inline void _local_bh_enable_nort(void) { }
706
707 static inline void ksoftirqd_set_sched_params(unsigned int cpu)
708 {
709         struct sched_param param = { .sched_priority = 1 };
710
711         sched_setscheduler(current, SCHED_FIFO, &param);
712         /* Take over all pending softirqs when starting */
713         local_irq_disable();
714         current->softirqs_raised = local_softirq_pending();
715         local_irq_enable();
716 }
717
718 static inline void ksoftirqd_clr_sched_params(unsigned int cpu, bool online)
719 {
720         struct sched_param param = { .sched_priority = 0 };
721
722         sched_setscheduler(current, SCHED_NORMAL, &param);
723 }
724
725 #endif /* PREEMPT_RT_FULL */
726 /*
727  * Enter an interrupt context.
728  */
729 void irq_enter(void)
730 {
731         rcu_irq_enter();
732         if (is_idle_task(current) && !in_interrupt()) {
733                 /*
734                  * Prevent raise_softirq from needlessly waking up ksoftirqd
735                  * here, as softirq will be serviced on return from interrupt.
736                  */
737                 local_bh_disable_nort();
738                 tick_irq_enter();
739                 _local_bh_enable_nort();
740         }
741
742         __irq_enter();
743 }
744
745 static inline void invoke_softirq(void)
746 {
747 #ifndef CONFIG_PREEMPT_RT_FULL
748         if (!force_irqthreads) {
749 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
750                 /*
751                  * We can safely execute softirq on the current stack if
752                  * it is the irq stack, because it should be near empty
753                  * at this stage.
754                  */
755                 __do_softirq();
756 #else
757                 /*
758                  * Otherwise, irq_exit() is called on the task stack that can
759                  * be potentially deep already. So call softirq in its own stack
760                  * to prevent from any overrun.
761                  */
762                 do_softirq_own_stack();
763 #endif
764         } else {
765                 wakeup_softirqd();
766         }
767 #else /* PREEMPT_RT_FULL */
768         unsigned long flags;
769
770         local_irq_save(flags);
771         if (__this_cpu_read(ksoftirqd) &&
772                         __this_cpu_read(ksoftirqd)->softirqs_raised)
773                 wakeup_softirqd();
774         local_irq_restore(flags);
775 #endif
776 }
777
778 static inline void tick_irq_exit(void)
779 {
780 #ifdef CONFIG_NO_HZ_COMMON
781         int cpu = smp_processor_id();
782
783         /* Make sure that timer wheel updates are propagated */
784         if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
785                 if (!in_interrupt())
786                         tick_nohz_irq_exit();
787         }
788 #endif
789 }
790
791 /*
792  * Exit an interrupt context. Process softirqs if needed and possible:
793  */
794 void irq_exit(void)
795 {
796 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
797         local_irq_disable();
798 #else
799         WARN_ON_ONCE(!irqs_disabled());
800 #endif
801
802         account_irq_exit_time(current);
803         preempt_count_sub(HARDIRQ_OFFSET);
804         if (!in_interrupt() && local_softirq_pending())
805                 invoke_softirq();
806
807         tick_irq_exit();
808         rcu_irq_exit();
809         trace_hardirq_exit(); /* must be last! */
810 }
811
812 void raise_softirq(unsigned int nr)
813 {
814         unsigned long flags;
815
816         local_irq_save(flags);
817         raise_softirq_irqoff(nr);
818         local_irq_restore(flags);
819 }
820
821 void open_softirq(int nr, void (*action)(struct softirq_action *))
822 {
823         softirq_vec[nr].action = action;
824 }
825
826 /*
827  * Tasklets
828  */
829 struct tasklet_head {
830         struct tasklet_struct *head;
831         struct tasklet_struct **tail;
832 };
833
834 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
835 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
836
837 static void inline
838 __tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
839 {
840         if (tasklet_trylock(t)) {
841 again:
842                 /* We may have been preempted before tasklet_trylock
843                  * and __tasklet_action may have already run.
844                  * So double check the sched bit while the takslet
845                  * is locked before adding it to the list.
846                  */
847                 if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
848                         t->next = NULL;
849                         *head->tail = t;
850                         head->tail = &(t->next);
851                         raise_softirq_irqoff(nr);
852                         tasklet_unlock(t);
853                 } else {
854                         /* This is subtle. If we hit the corner case above
855                          * It is possible that we get preempted right here,
856                          * and another task has successfully called
857                          * tasklet_schedule(), then this function, and
858                          * failed on the trylock. Thus we must be sure
859                          * before releasing the tasklet lock, that the
860                          * SCHED_BIT is clear. Otherwise the tasklet
861                          * may get its SCHED_BIT set, but not added to the
862                          * list
863                          */
864                         if (!tasklet_tryunlock(t))
865                                 goto again;
866                 }
867         }
868 }
869
870 void __tasklet_schedule(struct tasklet_struct *t)
871 {
872         unsigned long flags;
873
874         local_irq_save(flags);
875         __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
876         local_irq_restore(flags);
877 }
878 EXPORT_SYMBOL(__tasklet_schedule);
879
880 void __tasklet_hi_schedule(struct tasklet_struct *t)
881 {
882         unsigned long flags;
883
884         local_irq_save(flags);
885         __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
886         local_irq_restore(flags);
887 }
888 EXPORT_SYMBOL(__tasklet_hi_schedule);
889
890 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
891 {
892         BUG_ON(!irqs_disabled());
893
894         __tasklet_hi_schedule(t);
895 }
896 EXPORT_SYMBOL(__tasklet_hi_schedule_first);
897
898 void  tasklet_enable(struct tasklet_struct *t)
899 {
900         if (!atomic_dec_and_test(&t->count))
901                 return;
902         if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
903                 tasklet_schedule(t);
904 }
905 EXPORT_SYMBOL(tasklet_enable);
906
907 static void __tasklet_action(struct softirq_action *a,
908                              struct tasklet_struct *list)
909 {
910         int loops = 1000000;
911
912         while (list) {
913                 struct tasklet_struct *t = list;
914
915                 list = list->next;
916
917                 /*
918                  * Should always succeed - after a tasklist got on the
919                  * list (after getting the SCHED bit set from 0 to 1),
920                  * nothing but the tasklet softirq it got queued to can
921                  * lock it:
922                  */
923                 if (!tasklet_trylock(t)) {
924                         WARN_ON(1);
925                         continue;
926                 }
927
928                 t->next = NULL;
929
930                 /*
931                  * If we cannot handle the tasklet because it's disabled,
932                  * mark it as pending. tasklet_enable() will later
933                  * re-schedule the tasklet.
934                  */
935                 if (unlikely(atomic_read(&t->count))) {
936 out_disabled:
937                         /* implicit unlock: */
938                         wmb();
939                         t->state = TASKLET_STATEF_PENDING;
940                         continue;
941                 }
942
943                 /*
944                  * After this point on the tasklet might be rescheduled
945                  * on another CPU, but it can only be added to another
946                  * CPU's tasklet list if we unlock the tasklet (which we
947                  * dont do yet).
948                  */
949                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
950                         WARN_ON(1);
951
952 again:
953                 t->func(t->data);
954
955                 /*
956                  * Try to unlock the tasklet. We must use cmpxchg, because
957                  * another CPU might have scheduled or disabled the tasklet.
958                  * We only allow the STATE_RUN -> 0 transition here.
959                  */
960                 while (!tasklet_tryunlock(t)) {
961                         /*
962                          * If it got disabled meanwhile, bail out:
963                          */
964                         if (atomic_read(&t->count))
965                                 goto out_disabled;
966                         /*
967                          * If it got scheduled meanwhile, re-execute
968                          * the tasklet function:
969                          */
970                         if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
971                                 goto again;
972                         if (!--loops) {
973                                 printk("hm, tasklet state: %08lx\n", t->state);
974                                 WARN_ON(1);
975                                 tasklet_unlock(t);
976                                 break;
977                         }
978                 }
979         }
980 }
981
982 static void tasklet_action(struct softirq_action *a)
983 {
984         struct tasklet_struct *list;
985
986         local_irq_disable();
987
988         list = __this_cpu_read(tasklet_vec.head);
989         __this_cpu_write(tasklet_vec.head, NULL);
990         __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
991
992         local_irq_enable();
993
994         __tasklet_action(a, list);
995 }
996
997 static void tasklet_hi_action(struct softirq_action *a)
998 {
999         struct tasklet_struct *list;
1000
1001         local_irq_disable();
1002
1003         list = __this_cpu_read(tasklet_hi_vec.head);
1004         __this_cpu_write(tasklet_hi_vec.head, NULL);
1005         __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));
1006
1007         local_irq_enable();
1008
1009         __tasklet_action(a, list);
1010 }
1011
1012 void tasklet_init(struct tasklet_struct *t,
1013                   void (*func)(unsigned long), unsigned long data)
1014 {
1015         t->next = NULL;
1016         t->state = 0;
1017         atomic_set(&t->count, 0);
1018         t->func = func;
1019         t->data = data;
1020 }
1021 EXPORT_SYMBOL(tasklet_init);
1022
1023 void tasklet_kill(struct tasklet_struct *t)
1024 {
1025         if (in_interrupt())
1026                 pr_notice("Attempt to kill tasklet from interrupt\n");
1027
1028         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
1029                 do {
1030                         msleep(1);
1031                 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
1032         }
1033         tasklet_unlock_wait(t);
1034         clear_bit(TASKLET_STATE_SCHED, &t->state);
1035 }
1036 EXPORT_SYMBOL(tasklet_kill);
1037
1038 /*
1039  * tasklet_hrtimer
1040  */
1041
1042 /*
1043  * The trampoline is called when the hrtimer expires. It schedules a tasklet
1044  * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
1045  * hrtimer callback, but from softirq context.
1046  */
1047 static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
1048 {
1049         struct tasklet_hrtimer *ttimer =
1050                 container_of(timer, struct tasklet_hrtimer, timer);
1051
1052         tasklet_hi_schedule(&ttimer->tasklet);
1053         return HRTIMER_NORESTART;
1054 }
1055
1056 /*
1057  * Helper function which calls the hrtimer callback from
1058  * tasklet/softirq context
1059  */
1060 static void __tasklet_hrtimer_trampoline(unsigned long data)
1061 {
1062         struct tasklet_hrtimer *ttimer = (void *)data;
1063         enum hrtimer_restart restart;
1064
1065         restart = ttimer->function(&ttimer->timer);
1066         if (restart != HRTIMER_NORESTART)
1067                 hrtimer_restart(&ttimer->timer);
1068 }
1069
1070 /**
1071  * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
1072  * @ttimer:      tasklet_hrtimer which is initialized
1073  * @function:    hrtimer callback function which gets called from softirq context
1074  * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
1075  * @mode:        hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
1076  */
1077 void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
1078                           enum hrtimer_restart (*function)(struct hrtimer *),
1079                           clockid_t which_clock, enum hrtimer_mode mode)
1080 {
1081         hrtimer_init(&ttimer->timer, which_clock, mode);
1082         ttimer->timer.function = __hrtimer_tasklet_trampoline;
1083         tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
1084                      (unsigned long)ttimer);
1085         ttimer->function = function;
1086 }
1087 EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
1088
1089 void __init softirq_init(void)
1090 {
1091         int cpu;
1092
1093         for_each_possible_cpu(cpu) {
1094                 per_cpu(tasklet_vec, cpu).tail =
1095                         &per_cpu(tasklet_vec, cpu).head;
1096                 per_cpu(tasklet_hi_vec, cpu).tail =
1097                         &per_cpu(tasklet_hi_vec, cpu).head;
1098         }
1099
1100         open_softirq(TASKLET_SOFTIRQ, tasklet_action);
1101         open_softirq(HI_SOFTIRQ, tasklet_hi_action);
1102 }
1103
1104 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
1105 void tasklet_unlock_wait(struct tasklet_struct *t)
1106 {
1107         while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
1108                 /*
1109                  * Hack for now to avoid this busy-loop:
1110                  */
1111 #ifdef CONFIG_PREEMPT_RT_FULL
1112                 msleep(1);
1113 #else
1114                 barrier();
1115 #endif
1116         }
1117 }
1118 EXPORT_SYMBOL(tasklet_unlock_wait);
1119 #endif
1120
1121 static int ksoftirqd_should_run(unsigned int cpu)
1122 {
1123         return ksoftirqd_softirq_pending();
1124 }
1125
1126 #ifdef CONFIG_HOTPLUG_CPU
1127 /*
1128  * tasklet_kill_immediate is called to remove a tasklet which can already be
1129  * scheduled for execution on @cpu.
1130  *
1131  * Unlike tasklet_kill, this function removes the tasklet
1132  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
1133  *
1134  * When this function is called, @cpu must be in the CPU_DEAD state.
1135  */
1136 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
1137 {
1138         struct tasklet_struct **i;
1139
1140         BUG_ON(cpu_online(cpu));
1141         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
1142
1143         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
1144                 return;
1145
1146         /* CPU is dead, so no lock needed. */
1147         for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
1148                 if (*i == t) {
1149                         *i = t->next;
1150                         /* If this was the tail element, move the tail ptr */
1151                         if (*i == NULL)
1152                                 per_cpu(tasklet_vec, cpu).tail = i;
1153                         return;
1154                 }
1155         }
1156         BUG();
1157 }
1158
1159 static void takeover_tasklets(unsigned int cpu)
1160 {
1161         /* CPU is dead, so no lock needed. */
1162         local_irq_disable();
1163
1164         /* Find end, append list for that CPU. */
1165         if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
1166                 *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
1167                 this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
1168                 per_cpu(tasklet_vec, cpu).head = NULL;
1169                 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
1170         }
1171         raise_softirq_irqoff(TASKLET_SOFTIRQ);
1172
1173         if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
1174                 *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
1175                 __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
1176                 per_cpu(tasklet_hi_vec, cpu).head = NULL;
1177                 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
1178         }
1179         raise_softirq_irqoff(HI_SOFTIRQ);
1180
1181         local_irq_enable();
1182 }
1183 #endif /* CONFIG_HOTPLUG_CPU */
1184
1185 static int cpu_callback(struct notifier_block *nfb, unsigned long action,
1186                         void *hcpu)
1187 {
1188         switch (action) {
1189 #ifdef CONFIG_HOTPLUG_CPU
1190         case CPU_DEAD:
1191         case CPU_DEAD_FROZEN:
1192                 takeover_tasklets((unsigned long)hcpu);
1193                 break;
1194 #endif /* CONFIG_HOTPLUG_CPU */
1195         }
1196         return NOTIFY_OK;
1197 }
1198
1199 static struct notifier_block cpu_nfb = {
1200         .notifier_call = cpu_callback
1201 };
1202
1203 static struct smp_hotplug_thread softirq_threads = {
1204         .store                  = &ksoftirqd,
1205         .setup                  = ksoftirqd_set_sched_params,
1206         .cleanup                = ksoftirqd_clr_sched_params,
1207         .thread_should_run      = ksoftirqd_should_run,
1208         .thread_fn              = run_ksoftirqd,
1209         .thread_comm            = "ksoftirqd/%u",
1210 };
1211
1212 static __init int spawn_ksoftirqd(void)
1213 {
1214         register_cpu_notifier(&cpu_nfb);
1215
1216         BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
1217
1218         return 0;
1219 }
1220 early_initcall(spawn_ksoftirqd);
1221
1222 /*
1223  * [ These __weak aliases are kept in a separate compilation unit, so that
1224  *   GCC does not inline them incorrectly. ]
1225  */
1226
1227 int __init __weak early_irq_init(void)
1228 {
1229         return 0;
1230 }
1231
1232 int __init __weak arch_probe_nr_irqs(void)
1233 {
1234         return NR_IRQS_LEGACY;
1235 }
1236
1237 int __init __weak arch_early_irq_init(void)
1238 {
1239         return 0;
1240 }
1241
1242 unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
1243 {
1244         return from;
1245 }