These changes are the raw update to linux-4.4.6-rt14. Kernel sources

[kvmfornfv.git] / kernel / kernel / sched / rt.c
diff --git a/kernel/kernel/sched/rt.c b/kernel/kernel/sched/rt.c

index 637aa20..8cf360d 100644 (file)
--- a/kernel/kernel/sched/rt.c
+++ b/kernel/kernel/sched/rt.c
@@ -18,19 +18,22 @@ static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
  {
         struct rt_bandwidth *rt_b =
                 container_of(timer, struct rt_bandwidth, rt_period_timer);
-       ktime_t now;
-       int overrun;
         int idle = 0;
+       int overrun;
  
+       raw_spin_lock(&rt_b->rt_runtime_lock);
         for (;;) {
-               now = hrtimer_cb_get_time(timer);
-               overrun = hrtimer_forward(timer, now, rt_b->rt_period);
-
+               overrun = hrtimer_forward_now(timer, rt_b->rt_period);
                 if (!overrun)
                         break;
  
+               raw_spin_unlock(&rt_b->rt_runtime_lock);
                 idle = do_sched_rt_period_timer(rt_b, overrun);
+               raw_spin_lock(&rt_b->rt_runtime_lock);
         }
+       if (idle)
+               rt_b->rt_period_active = 0;
+       raw_spin_unlock(&rt_b->rt_runtime_lock);
  
         return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
  }
@@ -53,15 +56,16 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
         if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
                 return;
  
-       if (hrtimer_active(&rt_b->rt_period_timer))
-               return;
-
         raw_spin_lock(&rt_b->rt_runtime_lock);
-       start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
+       if (!rt_b->rt_period_active) {
+               rt_b->rt_period_active = 1;
+               hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period);
+               hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
+       }
         raw_spin_unlock(&rt_b->rt_runtime_lock);
  }
  
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(HAVE_RT_PUSH_IPI)
  static void push_irq_work_func(struct irq_work *work);
  #endif
  
@@ -258,7 +262,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
  
  #ifdef CONFIG_SMP
  
-static int pull_rt_task(struct rq *this_rq);
+static void pull_rt_task(struct rq *this_rq);
  
  static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
  {
@@ -324,7 +328,7 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
         rt_rq = &rq_of_rt_rq(rt_rq)->rt;
  
         rt_rq->rt_nr_total++;
-       if (p->nr_cpus_allowed > 1)
+       if (tsk_nr_cpus_allowed(p) > 1)
                 rt_rq->rt_nr_migratory++;
  
         update_rt_migration(rt_rq);
@@ -341,7 +345,7 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
         rt_rq = &rq_of_rt_rq(rt_rq)->rt;
  
         rt_rq->rt_nr_total--;
-       if (p->nr_cpus_allowed > 1)
+       if (tsk_nr_cpus_allowed(p) > 1)
                 rt_rq->rt_nr_migratory--;
  
         update_rt_migration(rt_rq);
@@ -352,13 +356,23 @@ static inline int has_pushable_tasks(struct rq *rq)
         return !plist_head_empty(&rq->rt.pushable_tasks);
  }
  
-static inline void set_post_schedule(struct rq *rq)
+static DEFINE_PER_CPU(struct callback_head, rt_push_head);
+static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
+
+static void push_rt_tasks(struct rq *);
+static void pull_rt_task(struct rq *);
+
+static inline void queue_push_tasks(struct rq *rq)
  {
-       /*
-        * We detect this state here so that we can avoid taking the RQ
-        * lock again later if there is no need to push
-        */
-       rq->post_schedule = has_pushable_tasks(rq);
+       if (!has_pushable_tasks(rq))
+               return;
+
+       queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
+}
+
+static inline void queue_pull_task(struct rq *rq)
+{
+       queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
  }
  
  static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
@@ -410,12 +424,11 @@ static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
         return false;
  }
  
-static inline int pull_rt_task(struct rq *this_rq)
+static inline void pull_rt_task(struct rq *this_rq)
  {
-       return 0;
  }
  
-static inline void set_post_schedule(struct rq *rq)
+static inline void queue_push_tasks(struct rq *rq)
  {
  }
  #endif /* CONFIG_SMP */
@@ -624,11 +637,11 @@ bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
  /*
   * We ran out of runtime, see if we can borrow some from our neighbours.
   */
-static int do_balance_runtime(struct rt_rq *rt_rq)
+static void do_balance_runtime(struct rt_rq *rt_rq)
  {
         struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
         struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
-       int i, weight, more = 0;
+       int i, weight;
         u64 rt_period;
  
         weight = cpumask_weight(rd->span);
@@ -662,7 +675,6 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
                                 diff = rt_period - rt_rq->rt_runtime;
                         iter->rt_runtime -= diff;
                         rt_rq->rt_runtime += diff;
-                       more = 1;
                         if (rt_rq->rt_runtime == rt_period) {
                                 raw_spin_unlock(&iter->rt_runtime_lock);
                                 break;
@@ -672,8 +684,6 @@ next:
                 raw_spin_unlock(&iter->rt_runtime_lock);
         }
         raw_spin_unlock(&rt_b->rt_runtime_lock);
-
-       return more;
  }
  
  /*
@@ -785,26 +795,19 @@ static void __enable_runtime(struct rq *rq)
         }
  }
  
-static int balance_runtime(struct rt_rq *rt_rq)
+static void balance_runtime(struct rt_rq *rt_rq)
  {
-       int more = 0;
-
         if (!sched_feat(RT_RUNTIME_SHARE))
-               return more;
+               return;
  
         if (rt_rq->rt_time > rt_rq->rt_runtime) {
                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
-               more = do_balance_runtime(rt_rq);
+               do_balance_runtime(rt_rq);
                 raw_spin_lock(&rt_rq->rt_runtime_lock);
         }
-
-       return more;
  }
  #else /* !CONFIG_SMP */
-static inline int balance_runtime(struct rt_rq *rt_rq)
-{
-       return 0;
-}
+static inline void balance_runtime(struct rt_rq *rt_rq) {}
  #endif /* CONFIG_SMP */
  
  static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
@@ -1261,7 +1264,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
  
         enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
  
-       if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
+       if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
                 enqueue_pushable_task(rq, p);
  }
  
@@ -1325,7 +1328,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
         rq = cpu_rq(cpu);
  
         rcu_read_lock();
-       curr = ACCESS_ONCE(rq->curr); /* unlocked access */
+       curr = READ_ONCE(rq->curr); /* unlocked access */
  
         /*
          * If the current task on @p's runqueue is an RT task, then
@@ -1350,7 +1353,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
          * will have to sort it out.
          */
         if (curr && unlikely(rt_task(curr)) &&
-           (curr->nr_cpus_allowed < 2 ||
+           (tsk_nr_cpus_allowed(curr) < 2 ||
              curr->prio <= p->prio)) {
                 int target = find_lowest_rq(p);
  
@@ -1374,7 +1377,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
          * Current can't be migrated, useless to reschedule,
          * let's hope p can move out.
          */
-       if (rq->curr->nr_cpus_allowed == 1 ||
+       if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
             !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
                 return;
  
@@ -1382,7 +1385,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
          * p is migratable, so let's not schedule it and
          * see if it is pushed or pulled somewhere else.
          */
-       if (p->nr_cpus_allowed != 1
+       if (tsk_nr_cpus_allowed(p) != 1
             && cpupri_find(&rq->rd->cpupri, p, NULL))
                 return;
  
@@ -1467,7 +1470,15 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
         struct rt_rq *rt_rq = &rq->rt;
  
         if (need_pull_rt_task(rq, prev)) {
+               /*
+                * This is OK, because current is on_cpu, which avoids it being
+                * picked for load-balance and preemption/IRQs are still
+                * disabled avoiding further scheduler activity on it and we're
+                * being very careful to re-start the picking loop.
+                */
+               lockdep_unpin_lock(&rq->lock);
                 pull_rt_task(rq);
+               lockdep_pin_lock(&rq->lock);
                 /*
                  * pull_rt_task() can drop (and re-acquire) rq->lock; this
                  * means a dl or stop task can slip in, in which case we need
@@ -1495,7 +1506,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
         /* The running task is never eligible for pushing */
         dequeue_pushable_task(rq, p);
  
-       set_post_schedule(rq);
+       queue_push_tasks(rq);
  
         return p;
  }
@@ -1508,7 +1519,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
          * The previous task needs to be made eligible for pushing
          * if it is still active
          */
-       if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
+       if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1)
                 enqueue_pushable_task(rq, p);
  }
  
@@ -1558,7 +1569,7 @@ static int find_lowest_rq(struct task_struct *task)
         if (unlikely(!lowest_mask))
                 return -1;
  
-       if (task->nr_cpus_allowed == 1)
+       if (tsk_nr_cpus_allowed(task) == 1)
                 return -1; /* No other targets possible */
  
         if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
@@ -1690,7 +1701,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
  
         BUG_ON(rq->cpu != task_cpu(p));
         BUG_ON(task_current(rq, p));
-       BUG_ON(p->nr_cpus_allowed <= 1);
+       BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
  
         BUG_ON(!task_on_rq_queued(p));
         BUG_ON(!rt_task(p));
@@ -1950,14 +1961,15 @@ static void push_irq_work_func(struct irq_work *work)
  }
  #endif /* HAVE_RT_PUSH_IPI */
  
-static int pull_rt_task(struct rq *this_rq)
+static void pull_rt_task(struct rq *this_rq)
  {
-       int this_cpu = this_rq->cpu, ret = 0, cpu;
+       int this_cpu = this_rq->cpu, cpu;
+       bool resched = false;
         struct task_struct *p;
         struct rq *src_rq;
  
         if (likely(!rt_overloaded(this_rq)))
-               return 0;
+               return;
  
         /*
          * Match the barrier from rt_set_overloaded; this guarantees that if we
@@ -1968,7 +1980,7 @@ static int pull_rt_task(struct rq *this_rq)
  #ifdef HAVE_RT_PUSH_IPI
         if (sched_feat(RT_PUSH_IPI)) {
                 tell_cpu_to_push(this_rq);
-               return 0;
+               return;
         }
  #endif
  
@@ -2021,7 +2033,7 @@ static int pull_rt_task(struct rq *this_rq)
                         if (p->prio < src_rq->curr->prio)
                                 goto skip;
  
-                       ret = 1;
+                       resched = true;
  
                         deactivate_task(src_rq, p, 0);
                         set_task_cpu(p, this_cpu);
@@ -2037,12 +2049,8 @@ skip:
                 double_unlock_balance(this_rq, src_rq);
         }
  
-       return ret;
-}
-
-static void post_schedule_rt(struct rq *rq)
-{
-       push_rt_tasks(rq);
+       if (resched)
+               resched_curr(this_rq);
  }
  
  /*
@@ -2053,53 +2061,13 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
  {
         if (!task_running(rq, p) &&
             !test_tsk_need_resched(rq->curr) &&
-           has_pushable_tasks(rq) &&
-           p->nr_cpus_allowed > 1 &&
+           tsk_nr_cpus_allowed(p) > 1 &&
             (dl_task(rq->curr) || rt_task(rq->curr)) &&
-           (rq->curr->nr_cpus_allowed < 2 ||
+           (tsk_nr_cpus_allowed(rq->curr) < 2 ||
              rq->curr->prio <= p->prio))
                 push_rt_tasks(rq);
  }
  
-static void set_cpus_allowed_rt(struct task_struct *p,
-                               const struct cpumask *new_mask)
-{
-       struct rq *rq;
-       int weight;
-
-       BUG_ON(!rt_task(p));
-
-       if (!task_on_rq_queued(p))
-               return;
-
-       weight = cpumask_weight(new_mask);
-
-       /*
-        * Only update if the process changes its state from whether it
-        * can migrate or not.
-        */
-       if ((p->nr_cpus_allowed > 1) == (weight > 1))
-               return;
-
-       rq = task_rq(p);
-
-       /*
-        * The process used to be able to migrate OR it can now migrate
-        */
-       if (weight <= 1) {
-               if (!task_current(rq, p))
-                       dequeue_pushable_task(rq, p);
-               BUG_ON(!rq->rt.rt_nr_migratory);
-               rq->rt.rt_nr_migratory--;
-       } else {
-               if (!task_current(rq, p))
-                       enqueue_pushable_task(rq, p);
-               rq->rt.rt_nr_migratory++;
-       }
-
-       update_rt_migration(&rq->rt);
-}
-
  /* Assumes rq->lock is held */
  static void rq_online_rt(struct rq *rq)
  {
@@ -2138,8 +2106,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
         if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
                 return;
  
-       if (pull_rt_task(rq))
-               resched_curr(rq);
+       queue_pull_task(rq);
  }
  
  void __init init_sched_rt_class(void)
@@ -2160,8 +2127,6 @@ void __init init_sched_rt_class(void)
   */
  static void switched_to_rt(struct rq *rq, struct task_struct *p)
  {
-       int check_resched = 1;
-
         /*
          * If we are already running, then there's nothing
          * that needs to be done. But if we are not running
@@ -2171,13 +2136,12 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
          */
         if (task_on_rq_queued(p) && rq->curr != p) {
  #ifdef CONFIG_SMP
-               if (p->nr_cpus_allowed > 1 && rq->rt.overloaded &&
-                   /* Don't resched if we changed runqueues */
-                   push_rt_task(rq) && rq != task_rq(p))
-                       check_resched = 0;
-#endif /* CONFIG_SMP */
-               if (check_resched && p->prio < rq->curr->prio)
+               if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded)
+                       queue_push_tasks(rq);
+#else
+               if (p->prio < rq->curr->prio)
                         resched_curr(rq);
+#endif /* CONFIG_SMP */
         }
  }
  
@@ -2198,14 +2162,13 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
                  * may need to pull tasks to this runqueue.
                  */
                 if (oldprio < p->prio)
-                       pull_rt_task(rq);
+                       queue_pull_task(rq);
+
                 /*
                  * If there's a higher priority task waiting to run
-                * then reschedule. Note, the above pull_rt_task
-                * can release the rq lock and p could migrate.
-                * Only reschedule if p is still on the same runqueue.
+                * then reschedule.
                  */
-               if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
+               if (p->prio > rq->rt.highest_prio.curr)
                         resched_curr(rq);
  #else
                 /* For UP simply resched on drop of prio */
@@ -2313,10 +2276,9 @@ const struct sched_class rt_sched_class = {
  #ifdef CONFIG_SMP
         .select_task_rq         = select_task_rq_rt,
  
-       .set_cpus_allowed       = set_cpus_allowed_rt,
+       .set_cpus_allowed       = set_cpus_allowed_common,
         .rq_online              = rq_online_rt,
         .rq_offline             = rq_offline_rt,
-       .post_schedule          = post_schedule_rt,
         .task_woken             = task_woken_rt,
         .switched_from          = switched_from_rt,
  #endif