These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  *      Distribute under GPLv2.
7  *
8  *      Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/export.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/delay.h>
25 #include <linux/ftrace.h>
26 #include <linux/smp.h>
27 #include <linux/smpboot.h>
28 #include <linux/tick.h>
29 #include <linux/locallock.h>
30 #include <linux/irq.h>
31
32 #define CREATE_TRACE_POINTS
33 #include <trace/events/irq.h>
34
35 /*
36    - No shared variables, all the data are CPU local.
37    - If a softirq needs serialization, let it serialize itself
38      by its own spinlocks.
39    - Even if softirq is serialized, only local cpu is marked for
40      execution. Hence, we get something sort of weak cpu binding.
41      Though it is still not clear, will it result in better locality
42      or will not.
43
44    Examples:
45    - NET RX softirq. It is multithreaded and does not require
46      any global serialization.
47    - NET TX softirq. It kicks software netdevice queues, hence
48      it is logically serialized per device, but this serialization
49      is invisible to common code.
50    - Tasklets: serialized wrt itself.
51  */
52
53 #ifndef __ARCH_IRQ_STAT
54 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
55 EXPORT_SYMBOL(irq_stat);
56 #endif
57
58 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
59
60 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
61 #ifdef CONFIG_PREEMPT_RT_FULL
62 #define TIMER_SOFTIRQS  ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ))
63 DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd);
64 #endif
65
66 const char * const softirq_to_name[NR_SOFTIRQS] = {
67         "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
68         "TASKLET", "SCHED", "HRTIMER", "RCU"
69 };
70
71 #ifdef CONFIG_NO_HZ_COMMON
72 # ifdef CONFIG_PREEMPT_RT_FULL
73
74 struct softirq_runner {
75         struct task_struct *runner[NR_SOFTIRQS];
76 };
77
78 static DEFINE_PER_CPU(struct softirq_runner, softirq_runners);
79
80 static inline void softirq_set_runner(unsigned int sirq)
81 {
82         struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
83
84         sr->runner[sirq] = current;
85 }
86
87 static inline void softirq_clr_runner(unsigned int sirq)
88 {
89         struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
90
91         sr->runner[sirq] = NULL;
92 }
93
94 /*
95  * On preempt-rt a softirq running context might be blocked on a
96  * lock. There might be no other runnable task on this CPU because the
97  * lock owner runs on some other CPU. So we have to go into idle with
98  * the pending bit set. Therefor we need to check this otherwise we
99  * warn about false positives which confuses users and defeats the
100  * whole purpose of this test.
101  *
102  * This code is called with interrupts disabled.
103  */
104 void softirq_check_pending_idle(void)
105 {
106         static int rate_limit;
107         struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
108         u32 warnpending;
109         int i;
110
111         if (rate_limit >= 10)
112                 return;
113
114         warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
115         for (i = 0; i < NR_SOFTIRQS; i++) {
116                 struct task_struct *tsk = sr->runner[i];
117
118                 /*
119                  * The wakeup code in rtmutex.c wakes up the task
120                  * _before_ it sets pi_blocked_on to NULL under
121                  * tsk->pi_lock. So we need to check for both: state
122                  * and pi_blocked_on.
123                  */
124                 if (tsk) {
125                         raw_spin_lock(&tsk->pi_lock);
126                         if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) {
127                                 /* Clear all bits pending in that task */
128                                 warnpending &= ~(tsk->softirqs_raised);
129                                 warnpending &= ~(1 << i);
130                         }
131                         raw_spin_unlock(&tsk->pi_lock);
132                 }
133         }
134
135         if (warnpending) {
136                 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
137                        warnpending);
138                 rate_limit++;
139         }
140 }
141 # else
142 /*
143  * On !PREEMPT_RT we just printk rate limited:
144  */
145 void softirq_check_pending_idle(void)
146 {
147         static int rate_limit;
148
149         if (rate_limit < 10 &&
150                         (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
151                 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
152                        local_softirq_pending());
153                 rate_limit++;
154         }
155 }
156 # endif
157
158 #else /* !CONFIG_NO_HZ_COMMON */
159 static inline void softirq_set_runner(unsigned int sirq) { }
160 static inline void softirq_clr_runner(unsigned int sirq) { }
161 #endif
162
163 /*
164  * we cannot loop indefinitely here to avoid userspace starvation,
165  * but we also don't want to introduce a worst case 1/HZ latency
166  * to the pending events, so lets the scheduler to balance
167  * the softirq load for us.
168  */
169 static void wakeup_softirqd(void)
170 {
171         /* Interrupts are disabled: no need to stop preemption */
172         struct task_struct *tsk = __this_cpu_read(ksoftirqd);
173
174         if (tsk && tsk->state != TASK_RUNNING)
175                 wake_up_process(tsk);
176 }
177
178 #ifdef CONFIG_PREEMPT_RT_FULL
179 static void wakeup_timer_softirqd(void)
180 {
181         /* Interrupts are disabled: no need to stop preemption */
182         struct task_struct *tsk = __this_cpu_read(ktimer_softirqd);
183
184         if (tsk && tsk->state != TASK_RUNNING)
185                 wake_up_process(tsk);
186 }
187 #endif
188
189 static void handle_softirq(unsigned int vec_nr)
190 {
191         struct softirq_action *h = softirq_vec + vec_nr;
192         int prev_count;
193
194         prev_count = preempt_count();
195
196         kstat_incr_softirqs_this_cpu(vec_nr);
197
198         trace_softirq_entry(vec_nr);
199         h->action(h);
200         trace_softirq_exit(vec_nr);
201         if (unlikely(prev_count != preempt_count())) {
202                 pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
203                        vec_nr, softirq_to_name[vec_nr], h->action,
204                        prev_count, preempt_count());
205                 preempt_count_set(prev_count);
206         }
207 }
208
209 #ifndef CONFIG_PREEMPT_RT_FULL
210 static inline int ksoftirqd_softirq_pending(void)
211 {
212         return local_softirq_pending();
213 }
214
215 static void handle_pending_softirqs(u32 pending)
216 {
217         struct softirq_action *h = softirq_vec;
218         int softirq_bit;
219
220         local_irq_enable();
221
222         h = softirq_vec;
223
224         while ((softirq_bit = ffs(pending))) {
225                 unsigned int vec_nr;
226
227                 h += softirq_bit - 1;
228                 vec_nr = h - softirq_vec;
229                 handle_softirq(vec_nr);
230
231                 h++;
232                 pending >>= softirq_bit;
233         }
234
235         rcu_bh_qs();
236         local_irq_disable();
237 }
238
239 static void run_ksoftirqd(unsigned int cpu)
240 {
241         local_irq_disable();
242         if (ksoftirqd_softirq_pending()) {
243                 __do_softirq();
244                 local_irq_enable();
245                 cond_resched_rcu_qs();
246                 return;
247         }
248         local_irq_enable();
249 }
250
251 /*
252  * preempt_count and SOFTIRQ_OFFSET usage:
253  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
254  *   softirq processing.
255  * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
256  *   on local_bh_disable or local_bh_enable.
257  * This lets us distinguish between whether we are currently processing
258  * softirq and whether we just have bh disabled.
259  */
260
261 /*
262  * This one is for softirq.c-internal use,
263  * where hardirqs are disabled legitimately:
264  */
265 #ifdef CONFIG_TRACE_IRQFLAGS
266 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
267 {
268         unsigned long flags;
269
270         WARN_ON_ONCE(in_irq());
271
272         raw_local_irq_save(flags);
273         /*
274          * The preempt tracer hooks into preempt_count_add and will break
275          * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
276          * is set and before current->softirq_enabled is cleared.
277          * We must manually increment preempt_count here and manually
278          * call the trace_preempt_off later.
279          */
280         __preempt_count_add(cnt);
281         /*
282          * Were softirqs turned off above:
283          */
284         if (softirq_count() == (cnt & SOFTIRQ_MASK))
285                 trace_softirqs_off(ip);
286         raw_local_irq_restore(flags);
287
288         if (preempt_count() == cnt) {
289 #ifdef CONFIG_DEBUG_PREEMPT
290                 current->preempt_disable_ip = get_lock_parent_ip();
291 #endif
292                 trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
293         }
294 }
295 EXPORT_SYMBOL(__local_bh_disable_ip);
296 #endif /* CONFIG_TRACE_IRQFLAGS */
297
298 static void __local_bh_enable(unsigned int cnt)
299 {
300         WARN_ON_ONCE(!irqs_disabled());
301
302         if (softirq_count() == (cnt & SOFTIRQ_MASK))
303                 trace_softirqs_on(_RET_IP_);
304         preempt_count_sub(cnt);
305 }
306
307 /*
308  * Special-case - softirqs can safely be enabled in
309  * cond_resched_softirq(), or by __do_softirq(),
310  * without processing still-pending softirqs:
311  */
312 void _local_bh_enable(void)
313 {
314         WARN_ON_ONCE(in_irq());
315         __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
316 }
317 EXPORT_SYMBOL(_local_bh_enable);
318
319 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
320 {
321         WARN_ON_ONCE(in_irq() || irqs_disabled());
322 #ifdef CONFIG_TRACE_IRQFLAGS
323         local_irq_disable();
324 #endif
325         /*
326          * Are softirqs going to be turned on now:
327          */
328         if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
329                 trace_softirqs_on(ip);
330         /*
331          * Keep preemption disabled until we are done with
332          * softirq processing:
333          */
334         preempt_count_sub(cnt - 1);
335
336         if (unlikely(!in_interrupt() && local_softirq_pending())) {
337                 /*
338                  * Run softirq if any pending. And do it in its own stack
339                  * as we may be calling this deep in a task call stack already.
340                  */
341                 do_softirq();
342         }
343
344         preempt_count_dec();
345 #ifdef CONFIG_TRACE_IRQFLAGS
346         local_irq_enable();
347 #endif
348         preempt_check_resched();
349 }
350 EXPORT_SYMBOL(__local_bh_enable_ip);
351
352 /*
353  * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
354  * but break the loop if need_resched() is set or after 2 ms.
355  * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
356  * certain cases, such as stop_machine(), jiffies may cease to
357  * increment and so we need the MAX_SOFTIRQ_RESTART limit as
358  * well to make sure we eventually return from this method.
359  *
360  * These limits have been established via experimentation.
361  * The two things to balance is latency against fairness -
362  * we want to handle softirqs as soon as possible, but they
363  * should not be able to lock up the box.
364  */
365 #define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
366 #define MAX_SOFTIRQ_RESTART 10
367
368 #ifdef CONFIG_TRACE_IRQFLAGS
369 /*
370  * When we run softirqs from irq_exit() and thus on the hardirq stack we need
371  * to keep the lockdep irq context tracking as tight as possible in order to
372  * not miss-qualify lock contexts and miss possible deadlocks.
373  */
374
375 static inline bool lockdep_softirq_start(void)
376 {
377         bool in_hardirq = false;
378
379         if (trace_hardirq_context(current)) {
380                 in_hardirq = true;
381                 trace_hardirq_exit();
382         }
383
384         lockdep_softirq_enter();
385
386         return in_hardirq;
387 }
388
389 static inline void lockdep_softirq_end(bool in_hardirq)
390 {
391         lockdep_softirq_exit();
392
393         if (in_hardirq)
394                 trace_hardirq_enter();
395 }
396 #else
397 static inline bool lockdep_softirq_start(void) { return false; }
398 static inline void lockdep_softirq_end(bool in_hardirq) { }
399 #endif
400
401 asmlinkage __visible void __do_softirq(void)
402 {
403         unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
404         unsigned long old_flags = current->flags;
405         int max_restart = MAX_SOFTIRQ_RESTART;
406         bool in_hardirq;
407         __u32 pending;
408
409         /*
410          * Mask out PF_MEMALLOC s current task context is borrowed for the
411          * softirq. A softirq handled such as network RX might set PF_MEMALLOC
412          * again if the socket is related to swap
413          */
414         current->flags &= ~PF_MEMALLOC;
415
416         pending = local_softirq_pending();
417         account_irq_enter_time(current);
418
419         __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
420         in_hardirq = lockdep_softirq_start();
421
422 restart:
423         /* Reset the pending bitmask before enabling irqs */
424         set_softirq_pending(0);
425
426         handle_pending_softirqs(pending);
427
428         pending = local_softirq_pending();
429         if (pending) {
430                 if (time_before(jiffies, end) && !need_resched() &&
431                     --max_restart)
432                         goto restart;
433
434                 wakeup_softirqd();
435         }
436
437         lockdep_softirq_end(in_hardirq);
438         account_irq_exit_time(current);
439         __local_bh_enable(SOFTIRQ_OFFSET);
440         WARN_ON_ONCE(in_interrupt());
441         tsk_restore_flags(current, old_flags, PF_MEMALLOC);
442 }
443
444 asmlinkage __visible void do_softirq(void)
445 {
446         __u32 pending;
447         unsigned long flags;
448
449         if (in_interrupt())
450                 return;
451
452         local_irq_save(flags);
453
454         pending = local_softirq_pending();
455
456         if (pending)
457                 do_softirq_own_stack();
458
459         local_irq_restore(flags);
460 }
461
462 /*
463  * This function must run with irqs disabled!
464  */
465 void raise_softirq_irqoff(unsigned int nr)
466 {
467         __raise_softirq_irqoff(nr);
468
469         /*
470          * If we're in an interrupt or softirq, we're done
471          * (this also catches softirq-disabled code). We will
472          * actually run the softirq once we return from
473          * the irq or softirq.
474          *
475          * Otherwise we wake up ksoftirqd to make sure we
476          * schedule the softirq soon.
477          */
478         if (!in_interrupt())
479                 wakeup_softirqd();
480 }
481
482 void __raise_softirq_irqoff(unsigned int nr)
483 {
484         trace_softirq_raise(nr);
485         or_softirq_pending(1UL << nr);
486 }
487
488 static inline void local_bh_disable_nort(void) { local_bh_disable(); }
489 static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
490 static void ksoftirqd_set_sched_params(unsigned int cpu) { }
491
492 #else /* !PREEMPT_RT_FULL */
493
494 /*
495  * On RT we serialize softirq execution with a cpu local lock per softirq
496  */
497 static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks);
498
499 void __init softirq_early_init(void)
500 {
501         int i;
502
503         for (i = 0; i < NR_SOFTIRQS; i++)
504                 local_irq_lock_init(local_softirq_locks[i]);
505 }
506
507 static void lock_softirq(int which)
508 {
509         local_lock(local_softirq_locks[which]);
510 }
511
512 static void unlock_softirq(int which)
513 {
514         local_unlock(local_softirq_locks[which]);
515 }
516
517 static void do_single_softirq(int which)
518 {
519         unsigned long old_flags = current->flags;
520
521         current->flags &= ~PF_MEMALLOC;
522         vtime_account_irq_enter(current);
523         current->flags |= PF_IN_SOFTIRQ;
524         lockdep_softirq_enter();
525         local_irq_enable();
526         handle_softirq(which);
527         local_irq_disable();
528         lockdep_softirq_exit();
529         current->flags &= ~PF_IN_SOFTIRQ;
530         vtime_account_irq_enter(current);
531         tsk_restore_flags(current, old_flags, PF_MEMALLOC);
532 }
533
534 /*
535  * Called with interrupts disabled. Process softirqs which were raised
536  * in current context (or on behalf of ksoftirqd).
537  */
538 static void do_current_softirqs(void)
539 {
540         while (current->softirqs_raised) {
541                 int i = __ffs(current->softirqs_raised);
542                 unsigned int pending, mask = (1U << i);
543
544                 current->softirqs_raised &= ~mask;
545                 local_irq_enable();
546
547                 /*
548                  * If the lock is contended, we boost the owner to
549                  * process the softirq or leave the critical section
550                  * now.
551                  */
552                 lock_softirq(i);
553                 local_irq_disable();
554                 softirq_set_runner(i);
555                 /*
556                  * Check with the local_softirq_pending() bits,
557                  * whether we need to process this still or if someone
558                  * else took care of it.
559                  */
560                 pending = local_softirq_pending();
561                 if (pending & mask) {
562                         set_softirq_pending(pending & ~mask);
563                         do_single_softirq(i);
564                 }
565                 softirq_clr_runner(i);
566                 WARN_ON(current->softirq_nestcnt != 1);
567                 local_irq_enable();
568                 unlock_softirq(i);
569                 local_irq_disable();
570         }
571 }
572
573 void __local_bh_disable(void)
574 {
575         if (++current->softirq_nestcnt == 1)
576                 migrate_disable();
577 }
578 EXPORT_SYMBOL(__local_bh_disable);
579
580 void __local_bh_enable(void)
581 {
582         if (WARN_ON(current->softirq_nestcnt == 0))
583                 return;
584
585         local_irq_disable();
586         if (current->softirq_nestcnt == 1 && current->softirqs_raised)
587                 do_current_softirqs();
588         local_irq_enable();
589
590         if (--current->softirq_nestcnt == 0)
591                 migrate_enable();
592 }
593 EXPORT_SYMBOL(__local_bh_enable);
594
595 void _local_bh_enable(void)
596 {
597         if (WARN_ON(current->softirq_nestcnt == 0))
598                 return;
599         if (--current->softirq_nestcnt == 0)
600                 migrate_enable();
601 }
602 EXPORT_SYMBOL(_local_bh_enable);
603
604 int in_serving_softirq(void)
605 {
606         return current->flags & PF_IN_SOFTIRQ;
607 }
608 EXPORT_SYMBOL(in_serving_softirq);
609
610 /* Called with preemption disabled */
611 static void run_ksoftirqd(unsigned int cpu)
612 {
613         local_irq_disable();
614         current->softirq_nestcnt++;
615
616         do_current_softirqs();
617         current->softirq_nestcnt--;
618         local_irq_enable();
619         cond_resched_rcu_qs();
620 }
621
622 /*
623  * Called from netif_rx_ni(). Preemption enabled, but migration
624  * disabled. So the cpu can't go away under us.
625  */
626 void thread_do_softirq(void)
627 {
628         if (!in_serving_softirq() && current->softirqs_raised) {
629                 current->softirq_nestcnt++;
630                 do_current_softirqs();
631                 current->softirq_nestcnt--;
632         }
633 }
634
635 static void do_raise_softirq_irqoff(unsigned int nr)
636 {
637         unsigned int mask;
638
639         mask = 1UL << nr;
640
641         trace_softirq_raise(nr);
642         or_softirq_pending(mask);
643
644         /*
645          * If we are not in a hard interrupt and inside a bh disabled
646          * region, we simply raise the flag on current. local_bh_enable()
647          * will make sure that the softirq is executed. Otherwise we
648          * delegate it to ksoftirqd.
649          */
650         if (!in_irq() && current->softirq_nestcnt)
651                 current->softirqs_raised |= mask;
652         else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd))
653                 return;
654
655         if (mask & TIMER_SOFTIRQS)
656                 __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
657         else
658                 __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
659 }
660
661 static void wakeup_proper_softirq(unsigned int nr)
662 {
663         if ((1UL << nr) & TIMER_SOFTIRQS)
664                 wakeup_timer_softirqd();
665         else
666                 wakeup_softirqd();
667 }
668
669
670 void __raise_softirq_irqoff(unsigned int nr)
671 {
672         do_raise_softirq_irqoff(nr);
673         if (!in_irq() && !current->softirq_nestcnt)
674                 wakeup_proper_softirq(nr);
675 }
676
677 /*
678  * Same as __raise_softirq_irqoff() but will process them in ksoftirqd
679  */
680 void __raise_softirq_irqoff_ksoft(unsigned int nr)
681 {
682         unsigned int mask;
683
684         if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) ||
685                          !__this_cpu_read(ktimer_softirqd)))
686                 return;
687         mask = 1UL << nr;
688
689         trace_softirq_raise(nr);
690         or_softirq_pending(mask);
691         if (mask & TIMER_SOFTIRQS)
692                 __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
693         else
694                 __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
695         wakeup_proper_softirq(nr);
696 }
697
698 /*
699  * This function must run with irqs disabled!
700  */
701 void raise_softirq_irqoff(unsigned int nr)
702 {
703         do_raise_softirq_irqoff(nr);
704
705         /*
706          * If we're in an hard interrupt we let irq return code deal
707          * with the wakeup of ksoftirqd.
708          */
709         if (in_irq())
710                 return;
711         /*
712          * If we are in thread context but outside of a bh disabled
713          * region, we need to wake ksoftirqd as well.
714          *
715          * CHECKME: Some of the places which do that could be wrapped
716          * into local_bh_disable/enable pairs. Though it's unclear
717          * whether this is worth the effort. To find those places just
718          * raise a WARN() if the condition is met.
719          */
720         if (!current->softirq_nestcnt)
721                 wakeup_proper_softirq(nr);
722 }
723
724 static inline int ksoftirqd_softirq_pending(void)
725 {
726         return current->softirqs_raised;
727 }
728
729 static inline void local_bh_disable_nort(void) { }
730 static inline void _local_bh_enable_nort(void) { }
731
732 static inline void ksoftirqd_set_sched_params(unsigned int cpu)
733 {
734         /* Take over all but timer pending softirqs when starting */
735         local_irq_disable();
736         current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS;
737         local_irq_enable();
738 }
739
740 static inline void ktimer_softirqd_set_sched_params(unsigned int cpu)
741 {
742         struct sched_param param = { .sched_priority = 1 };
743
744         sched_setscheduler(current, SCHED_FIFO, &param);
745
746         /* Take over timer pending softirqs when starting */
747         local_irq_disable();
748         current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS;
749         local_irq_enable();
750 }
751
752 static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu,
753                                                     bool online)
754 {
755         struct sched_param param = { .sched_priority = 0 };
756
757         sched_setscheduler(current, SCHED_NORMAL, &param);
758 }
759
760 static int ktimer_softirqd_should_run(unsigned int cpu)
761 {
762         return current->softirqs_raised;
763 }
764
765 #endif /* PREEMPT_RT_FULL */
766 /*
767  * Enter an interrupt context.
768  */
769 void irq_enter(void)
770 {
771         rcu_irq_enter();
772         if (is_idle_task(current) && !in_interrupt()) {
773                 /*
774                  * Prevent raise_softirq from needlessly waking up ksoftirqd
775                  * here, as softirq will be serviced on return from interrupt.
776                  */
777                 local_bh_disable_nort();
778                 tick_irq_enter();
779                 _local_bh_enable_nort();
780         }
781
782         __irq_enter();
783 }
784
785 static inline void invoke_softirq(void)
786 {
787 #ifndef CONFIG_PREEMPT_RT_FULL
788         if (!force_irqthreads) {
789 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
790                 /*
791                  * We can safely execute softirq on the current stack if
792                  * it is the irq stack, because it should be near empty
793                  * at this stage.
794                  */
795                 __do_softirq();
796 #else
797                 /*
798                  * Otherwise, irq_exit() is called on the task stack that can
799                  * be potentially deep already. So call softirq in its own stack
800                  * to prevent from any overrun.
801                  */
802                 do_softirq_own_stack();
803 #endif
804         } else {
805                 wakeup_softirqd();
806         }
807 #else /* PREEMPT_RT_FULL */
808         unsigned long flags;
809
810         local_irq_save(flags);
811         if (__this_cpu_read(ksoftirqd) &&
812                         __this_cpu_read(ksoftirqd)->softirqs_raised)
813                 wakeup_softirqd();
814         if (__this_cpu_read(ktimer_softirqd) &&
815                         __this_cpu_read(ktimer_softirqd)->softirqs_raised)
816                 wakeup_timer_softirqd();
817         local_irq_restore(flags);
818 #endif
819 }
820
821 static inline void tick_irq_exit(void)
822 {
823 #ifdef CONFIG_NO_HZ_COMMON
824         int cpu = smp_processor_id();
825
826         /* Make sure that timer wheel updates are propagated */
827         if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
828                 if (!in_interrupt())
829                         tick_nohz_irq_exit();
830         }
831 #endif
832 }
833
834 /*
835  * Exit an interrupt context. Process softirqs if needed and possible:
836  */
837 void irq_exit(void)
838 {
839 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
840         local_irq_disable();
841 #else
842         WARN_ON_ONCE(!irqs_disabled());
843 #endif
844
845         account_irq_exit_time(current);
846         preempt_count_sub(HARDIRQ_OFFSET);
847         if (!in_interrupt() && local_softirq_pending())
848                 invoke_softirq();
849
850         tick_irq_exit();
851         rcu_irq_exit();
852         trace_hardirq_exit(); /* must be last! */
853 }
854
855 void raise_softirq(unsigned int nr)
856 {
857         unsigned long flags;
858
859         local_irq_save(flags);
860         raise_softirq_irqoff(nr);
861         local_irq_restore(flags);
862 }
863
864 void open_softirq(int nr, void (*action)(struct softirq_action *))
865 {
866         softirq_vec[nr].action = action;
867 }
868
869 /*
870  * Tasklets
871  */
872 struct tasklet_head {
873         struct tasklet_struct *head;
874         struct tasklet_struct **tail;
875 };
876
877 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
878 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
879
880 static void inline
881 __tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
882 {
883         if (tasklet_trylock(t)) {
884 again:
885                 /* We may have been preempted before tasklet_trylock
886                  * and __tasklet_action may have already run.
887                  * So double check the sched bit while the takslet
888                  * is locked before adding it to the list.
889                  */
890                 if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
891                         t->next = NULL;
892                         *head->tail = t;
893                         head->tail = &(t->next);
894                         raise_softirq_irqoff(nr);
895                         tasklet_unlock(t);
896                 } else {
897                         /* This is subtle. If we hit the corner case above
898                          * It is possible that we get preempted right here,
899                          * and another task has successfully called
900                          * tasklet_schedule(), then this function, and
901                          * failed on the trylock. Thus we must be sure
902                          * before releasing the tasklet lock, that the
903                          * SCHED_BIT is clear. Otherwise the tasklet
904                          * may get its SCHED_BIT set, but not added to the
905                          * list
906                          */
907                         if (!tasklet_tryunlock(t))
908                                 goto again;
909                 }
910         }
911 }
912
913 void __tasklet_schedule(struct tasklet_struct *t)
914 {
915         unsigned long flags;
916
917         local_irq_save(flags);
918         __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
919         local_irq_restore(flags);
920 }
921 EXPORT_SYMBOL(__tasklet_schedule);
922
923 void __tasklet_hi_schedule(struct tasklet_struct *t)
924 {
925         unsigned long flags;
926
927         local_irq_save(flags);
928         __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
929         local_irq_restore(flags);
930 }
931 EXPORT_SYMBOL(__tasklet_hi_schedule);
932
933 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
934 {
935         BUG_ON(!irqs_disabled());
936
937         __tasklet_hi_schedule(t);
938 }
939 EXPORT_SYMBOL(__tasklet_hi_schedule_first);
940
941 void  tasklet_enable(struct tasklet_struct *t)
942 {
943         if (!atomic_dec_and_test(&t->count))
944                 return;
945         if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
946                 tasklet_schedule(t);
947 }
948 EXPORT_SYMBOL(tasklet_enable);
949
950 static void __tasklet_action(struct softirq_action *a,
951                              struct tasklet_struct *list)
952 {
953         int loops = 1000000;
954
955         while (list) {
956                 struct tasklet_struct *t = list;
957
958                 list = list->next;
959
960                 /*
961                  * Should always succeed - after a tasklist got on the
962                  * list (after getting the SCHED bit set from 0 to 1),
963                  * nothing but the tasklet softirq it got queued to can
964                  * lock it:
965                  */
966                 if (!tasklet_trylock(t)) {
967                         WARN_ON(1);
968                         continue;
969                 }
970
971                 t->next = NULL;
972
973                 /*
974                  * If we cannot handle the tasklet because it's disabled,
975                  * mark it as pending. tasklet_enable() will later
976                  * re-schedule the tasklet.
977                  */
978                 if (unlikely(atomic_read(&t->count))) {
979 out_disabled:
980                         /* implicit unlock: */
981                         wmb();
982                         t->state = TASKLET_STATEF_PENDING;
983                         continue;
984                 }
985
986                 /*
987                  * After this point on the tasklet might be rescheduled
988                  * on another CPU, but it can only be added to another
989                  * CPU's tasklet list if we unlock the tasklet (which we
990                  * dont do yet).
991                  */
992                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
993                         WARN_ON(1);
994
995 again:
996                 t->func(t->data);
997
998                 /*
999                  * Try to unlock the tasklet. We must use cmpxchg, because
1000                  * another CPU might have scheduled or disabled the tasklet.
1001                  * We only allow the STATE_RUN -> 0 transition here.
1002                  */
1003                 while (!tasklet_tryunlock(t)) {
1004                         /*
1005                          * If it got disabled meanwhile, bail out:
1006                          */
1007                         if (atomic_read(&t->count))
1008                                 goto out_disabled;
1009                         /*
1010                          * If it got scheduled meanwhile, re-execute
1011                          * the tasklet function:
1012                          */
1013                         if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
1014                                 goto again;
1015                         if (!--loops) {
1016                                 printk("hm, tasklet state: %08lx\n", t->state);
1017                                 WARN_ON(1);
1018                                 tasklet_unlock(t);
1019                                 break;
1020                         }
1021                 }
1022         }
1023 }
1024
1025 static void tasklet_action(struct softirq_action *a)
1026 {
1027         struct tasklet_struct *list;
1028
1029         local_irq_disable();
1030
1031         list = __this_cpu_read(tasklet_vec.head);
1032         __this_cpu_write(tasklet_vec.head, NULL);
1033         __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
1034
1035         local_irq_enable();
1036
1037         __tasklet_action(a, list);
1038 }
1039
1040 static void tasklet_hi_action(struct softirq_action *a)
1041 {
1042         struct tasklet_struct *list;
1043
1044         local_irq_disable();
1045
1046         list = __this_cpu_read(tasklet_hi_vec.head);
1047         __this_cpu_write(tasklet_hi_vec.head, NULL);
1048         __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));
1049
1050         local_irq_enable();
1051
1052         __tasklet_action(a, list);
1053 }
1054
1055 void tasklet_init(struct tasklet_struct *t,
1056                   void (*func)(unsigned long), unsigned long data)
1057 {
1058         t->next = NULL;
1059         t->state = 0;
1060         atomic_set(&t->count, 0);
1061         t->func = func;
1062         t->data = data;
1063 }
1064 EXPORT_SYMBOL(tasklet_init);
1065
1066 void tasklet_kill(struct tasklet_struct *t)
1067 {
1068         if (in_interrupt())
1069                 pr_notice("Attempt to kill tasklet from interrupt\n");
1070
1071         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
1072                 do {
1073                         msleep(1);
1074                 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
1075         }
1076         tasklet_unlock_wait(t);
1077         clear_bit(TASKLET_STATE_SCHED, &t->state);
1078 }
1079 EXPORT_SYMBOL(tasklet_kill);
1080
1081 /*
1082  * tasklet_hrtimer
1083  */
1084
1085 /*
1086  * The trampoline is called when the hrtimer expires. It schedules a tasklet
1087  * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
1088  * hrtimer callback, but from softirq context.
1089  */
1090 static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
1091 {
1092         struct tasklet_hrtimer *ttimer =
1093                 container_of(timer, struct tasklet_hrtimer, timer);
1094
1095         tasklet_hi_schedule(&ttimer->tasklet);
1096         return HRTIMER_NORESTART;
1097 }
1098
1099 /*
1100  * Helper function which calls the hrtimer callback from
1101  * tasklet/softirq context
1102  */
1103 static void __tasklet_hrtimer_trampoline(unsigned long data)
1104 {
1105         struct tasklet_hrtimer *ttimer = (void *)data;
1106         enum hrtimer_restart restart;
1107
1108         restart = ttimer->function(&ttimer->timer);
1109         if (restart != HRTIMER_NORESTART)
1110                 hrtimer_restart(&ttimer->timer);
1111 }
1112
1113 /**
1114  * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
1115  * @ttimer:      tasklet_hrtimer which is initialized
1116  * @function:    hrtimer callback function which gets called from softirq context
1117  * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
1118  * @mode:        hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
1119  */
1120 void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
1121                           enum hrtimer_restart (*function)(struct hrtimer *),
1122                           clockid_t which_clock, enum hrtimer_mode mode)
1123 {
1124         hrtimer_init(&ttimer->timer, which_clock, mode);
1125         ttimer->timer.function = __hrtimer_tasklet_trampoline;
1126         tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
1127                      (unsigned long)ttimer);
1128         ttimer->function = function;
1129 }
1130 EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
1131
1132 void __init softirq_init(void)
1133 {
1134         int cpu;
1135
1136         for_each_possible_cpu(cpu) {
1137                 per_cpu(tasklet_vec, cpu).tail =
1138                         &per_cpu(tasklet_vec, cpu).head;
1139                 per_cpu(tasklet_hi_vec, cpu).tail =
1140                         &per_cpu(tasklet_hi_vec, cpu).head;
1141         }
1142
1143         open_softirq(TASKLET_SOFTIRQ, tasklet_action);
1144         open_softirq(HI_SOFTIRQ, tasklet_hi_action);
1145 }
1146
1147 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
1148 void tasklet_unlock_wait(struct tasklet_struct *t)
1149 {
1150         while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
1151                 /*
1152                  * Hack for now to avoid this busy-loop:
1153                  */
1154 #ifdef CONFIG_PREEMPT_RT_FULL
1155                 msleep(1);
1156 #else
1157                 barrier();
1158 #endif
1159         }
1160 }
1161 EXPORT_SYMBOL(tasklet_unlock_wait);
1162 #endif
1163
1164 static int ksoftirqd_should_run(unsigned int cpu)
1165 {
1166         return ksoftirqd_softirq_pending();
1167 }
1168
1169 #ifdef CONFIG_HOTPLUG_CPU
1170 /*
1171  * tasklet_kill_immediate is called to remove a tasklet which can already be
1172  * scheduled for execution on @cpu.
1173  *
1174  * Unlike tasklet_kill, this function removes the tasklet
1175  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
1176  *
1177  * When this function is called, @cpu must be in the CPU_DEAD state.
1178  */
1179 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
1180 {
1181         struct tasklet_struct **i;
1182
1183         BUG_ON(cpu_online(cpu));
1184         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
1185
1186         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
1187                 return;
1188
1189         /* CPU is dead, so no lock needed. */
1190         for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
1191                 if (*i == t) {
1192                         *i = t->next;
1193                         /* If this was the tail element, move the tail ptr */
1194                         if (*i == NULL)
1195                                 per_cpu(tasklet_vec, cpu).tail = i;
1196                         return;
1197                 }
1198         }
1199         BUG();
1200 }
1201
1202 static void takeover_tasklets(unsigned int cpu)
1203 {
1204         /* CPU is dead, so no lock needed. */
1205         local_irq_disable();
1206
1207         /* Find end, append list for that CPU. */
1208         if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
1209                 *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
1210                 this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
1211                 per_cpu(tasklet_vec, cpu).head = NULL;
1212                 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
1213         }
1214         raise_softirq_irqoff(TASKLET_SOFTIRQ);
1215
1216         if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
1217                 *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
1218                 __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
1219                 per_cpu(tasklet_hi_vec, cpu).head = NULL;
1220                 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
1221         }
1222         raise_softirq_irqoff(HI_SOFTIRQ);
1223
1224         local_irq_enable();
1225 }
1226 #endif /* CONFIG_HOTPLUG_CPU */
1227
1228 static int cpu_callback(struct notifier_block *nfb, unsigned long action,
1229                         void *hcpu)
1230 {
1231         switch (action) {
1232 #ifdef CONFIG_HOTPLUG_CPU
1233         case CPU_DEAD:
1234         case CPU_DEAD_FROZEN:
1235                 takeover_tasklets((unsigned long)hcpu);
1236                 break;
1237 #endif /* CONFIG_HOTPLUG_CPU */
1238         }
1239         return NOTIFY_OK;
1240 }
1241
1242 static struct notifier_block cpu_nfb = {
1243         .notifier_call = cpu_callback
1244 };
1245
1246 static struct smp_hotplug_thread softirq_threads = {
1247         .store                  = &ksoftirqd,
1248         .setup                  = ksoftirqd_set_sched_params,
1249         .thread_should_run      = ksoftirqd_should_run,
1250         .thread_fn              = run_ksoftirqd,
1251         .thread_comm            = "ksoftirqd/%u",
1252 };
1253
1254 #ifdef CONFIG_PREEMPT_RT_FULL
1255 static struct smp_hotplug_thread softirq_timer_threads = {
1256         .store                  = &ktimer_softirqd,
1257         .setup                  = ktimer_softirqd_set_sched_params,
1258         .cleanup                = ktimer_softirqd_clr_sched_params,
1259         .thread_should_run      = ktimer_softirqd_should_run,
1260         .thread_fn              = run_ksoftirqd,
1261         .thread_comm            = "ktimersoftd/%u",
1262 };
1263 #endif
1264
1265 static __init int spawn_ksoftirqd(void)
1266 {
1267         register_cpu_notifier(&cpu_nfb);
1268
1269         BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
1270 #ifdef CONFIG_PREEMPT_RT_FULL
1271         BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads));
1272 #endif
1273
1274         return 0;
1275 }
1276 early_initcall(spawn_ksoftirqd);
1277
1278 /*
1279  * [ These __weak aliases are kept in a separate compilation unit, so that
1280  *   GCC does not inline them incorrectly. ]
1281  */
1282
1283 int __init __weak early_irq_init(void)
1284 {
1285         return 0;
1286 }
1287
1288 int __init __weak arch_probe_nr_irqs(void)
1289 {
1290         return NR_IRQS_LEGACY;
1291 }
1292
1293 int __init __weak arch_early_irq_init(void)
1294 {
1295         return 0;
1296 }
1297
1298 unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
1299 {
1300         return from;
1301 }