These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / kernel / sched / rt.c
index 637aa20..8cf360d 100644 (file)
@@ -18,19 +18,22 @@ static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
 {
        struct rt_bandwidth *rt_b =
                container_of(timer, struct rt_bandwidth, rt_period_timer);
-       ktime_t now;
-       int overrun;
        int idle = 0;
+       int overrun;
 
+       raw_spin_lock(&rt_b->rt_runtime_lock);
        for (;;) {
-               now = hrtimer_cb_get_time(timer);
-               overrun = hrtimer_forward(timer, now, rt_b->rt_period);
-
+               overrun = hrtimer_forward_now(timer, rt_b->rt_period);
                if (!overrun)
                        break;
 
+               raw_spin_unlock(&rt_b->rt_runtime_lock);
                idle = do_sched_rt_period_timer(rt_b, overrun);
+               raw_spin_lock(&rt_b->rt_runtime_lock);
        }
+       if (idle)
+               rt_b->rt_period_active = 0;
+       raw_spin_unlock(&rt_b->rt_runtime_lock);
 
        return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
 }
@@ -53,15 +56,16 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
        if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
                return;
 
-       if (hrtimer_active(&rt_b->rt_period_timer))
-               return;
-
        raw_spin_lock(&rt_b->rt_runtime_lock);
-       start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
+       if (!rt_b->rt_period_active) {
+               rt_b->rt_period_active = 1;
+               hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period);
+               hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
+       }
        raw_spin_unlock(&rt_b->rt_runtime_lock);
 }
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(HAVE_RT_PUSH_IPI)
 static void push_irq_work_func(struct irq_work *work);
 #endif
 
@@ -258,7 +262,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 
 #ifdef CONFIG_SMP
 
-static int pull_rt_task(struct rq *this_rq);
+static void pull_rt_task(struct rq *this_rq);
 
 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
 {
@@ -324,7 +328,7 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
        rt_rq = &rq_of_rt_rq(rt_rq)->rt;
 
        rt_rq->rt_nr_total++;
-       if (p->nr_cpus_allowed > 1)
+       if (tsk_nr_cpus_allowed(p) > 1)
                rt_rq->rt_nr_migratory++;
 
        update_rt_migration(rt_rq);
@@ -341,7 +345,7 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
        rt_rq = &rq_of_rt_rq(rt_rq)->rt;
 
        rt_rq->rt_nr_total--;
-       if (p->nr_cpus_allowed > 1)
+       if (tsk_nr_cpus_allowed(p) > 1)
                rt_rq->rt_nr_migratory--;
 
        update_rt_migration(rt_rq);
@@ -352,13 +356,23 @@ static inline int has_pushable_tasks(struct rq *rq)
        return !plist_head_empty(&rq->rt.pushable_tasks);
 }
 
-static inline void set_post_schedule(struct rq *rq)
+static DEFINE_PER_CPU(struct callback_head, rt_push_head);
+static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
+
+static void push_rt_tasks(struct rq *);
+static void pull_rt_task(struct rq *);
+
+static inline void queue_push_tasks(struct rq *rq)
 {
-       /*
-        * We detect this state here so that we can avoid taking the RQ
-        * lock again later if there is no need to push
-        */
-       rq->post_schedule = has_pushable_tasks(rq);
+       if (!has_pushable_tasks(rq))
+               return;
+
+       queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
+}
+
+static inline void queue_pull_task(struct rq *rq)
+{
+       queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
 }
 
 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
@@ -410,12 +424,11 @@ static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
        return false;
 }
 
-static inline int pull_rt_task(struct rq *this_rq)
+static inline void pull_rt_task(struct rq *this_rq)
 {
-       return 0;
 }
 
-static inline void set_post_schedule(struct rq *rq)
+static inline void queue_push_tasks(struct rq *rq)
 {
 }
 #endif /* CONFIG_SMP */
@@ -624,11 +637,11 @@ bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
 /*
  * We ran out of runtime, see if we can borrow some from our neighbours.
  */
-static int do_balance_runtime(struct rt_rq *rt_rq)
+static void do_balance_runtime(struct rt_rq *rt_rq)
 {
        struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
        struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
-       int i, weight, more = 0;
+       int i, weight;
        u64 rt_period;
 
        weight = cpumask_weight(rd->span);
@@ -662,7 +675,6 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
                                diff = rt_period - rt_rq->rt_runtime;
                        iter->rt_runtime -= diff;
                        rt_rq->rt_runtime += diff;
-                       more = 1;
                        if (rt_rq->rt_runtime == rt_period) {
                                raw_spin_unlock(&iter->rt_runtime_lock);
                                break;
@@ -672,8 +684,6 @@ next:
                raw_spin_unlock(&iter->rt_runtime_lock);
        }
        raw_spin_unlock(&rt_b->rt_runtime_lock);
-
-       return more;
 }
 
 /*
@@ -785,26 +795,19 @@ static void __enable_runtime(struct rq *rq)
        }
 }
 
-static int balance_runtime(struct rt_rq *rt_rq)
+static void balance_runtime(struct rt_rq *rt_rq)
 {
-       int more = 0;
-
        if (!sched_feat(RT_RUNTIME_SHARE))
-               return more;
+               return;
 
        if (rt_rq->rt_time > rt_rq->rt_runtime) {
                raw_spin_unlock(&rt_rq->rt_runtime_lock);
-               more = do_balance_runtime(rt_rq);
+               do_balance_runtime(rt_rq);
                raw_spin_lock(&rt_rq->rt_runtime_lock);
        }
-
-       return more;
 }
 #else /* !CONFIG_SMP */
-static inline int balance_runtime(struct rt_rq *rt_rq)
-{
-       return 0;
-}
+static inline void balance_runtime(struct rt_rq *rt_rq) {}
 #endif /* CONFIG_SMP */
 
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
@@ -1261,7 +1264,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
        enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
 
-       if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
+       if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
                enqueue_pushable_task(rq, p);
 }
 
@@ -1325,7 +1328,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
        rq = cpu_rq(cpu);
 
        rcu_read_lock();
-       curr = ACCESS_ONCE(rq->curr); /* unlocked access */
+       curr = READ_ONCE(rq->curr); /* unlocked access */
 
        /*
         * If the current task on @p's runqueue is an RT task, then
@@ -1350,7 +1353,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
         * will have to sort it out.
         */
        if (curr && unlikely(rt_task(curr)) &&
-           (curr->nr_cpus_allowed < 2 ||
+           (tsk_nr_cpus_allowed(curr) < 2 ||
             curr->prio <= p->prio)) {
                int target = find_lowest_rq(p);
 
@@ -1374,7 +1377,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
         * Current can't be migrated, useless to reschedule,
         * let's hope p can move out.
         */
-       if (rq->curr->nr_cpus_allowed == 1 ||
+       if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
            !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
                return;
 
@@ -1382,7 +1385,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
         * p is migratable, so let's not schedule it and
         * see if it is pushed or pulled somewhere else.
         */
-       if (p->nr_cpus_allowed != 1
+       if (tsk_nr_cpus_allowed(p) != 1
            && cpupri_find(&rq->rd->cpupri, p, NULL))
                return;
 
@@ -1467,7 +1470,15 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
        struct rt_rq *rt_rq = &rq->rt;
 
        if (need_pull_rt_task(rq, prev)) {
+               /*
+                * This is OK, because current is on_cpu, which avoids it being
+                * picked for load-balance and preemption/IRQs are still
+                * disabled avoiding further scheduler activity on it and we're
+                * being very careful to re-start the picking loop.
+                */
+               lockdep_unpin_lock(&rq->lock);
                pull_rt_task(rq);
+               lockdep_pin_lock(&rq->lock);
                /*
                 * pull_rt_task() can drop (and re-acquire) rq->lock; this
                 * means a dl or stop task can slip in, in which case we need
@@ -1495,7 +1506,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
        /* The running task is never eligible for pushing */
        dequeue_pushable_task(rq, p);
 
-       set_post_schedule(rq);
+       queue_push_tasks(rq);
 
        return p;
 }
@@ -1508,7 +1519,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
         * The previous task needs to be made eligible for pushing
         * if it is still active
         */
-       if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
+       if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1)
                enqueue_pushable_task(rq, p);
 }
 
@@ -1558,7 +1569,7 @@ static int find_lowest_rq(struct task_struct *task)
        if (unlikely(!lowest_mask))
                return -1;
 
-       if (task->nr_cpus_allowed == 1)
+       if (tsk_nr_cpus_allowed(task) == 1)
                return -1; /* No other targets possible */
 
        if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
@@ -1690,7 +1701,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
 
        BUG_ON(rq->cpu != task_cpu(p));
        BUG_ON(task_current(rq, p));
-       BUG_ON(p->nr_cpus_allowed <= 1);
+       BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
 
        BUG_ON(!task_on_rq_queued(p));
        BUG_ON(!rt_task(p));
@@ -1950,14 +1961,15 @@ static void push_irq_work_func(struct irq_work *work)
 }
 #endif /* HAVE_RT_PUSH_IPI */
 
-static int pull_rt_task(struct rq *this_rq)
+static void pull_rt_task(struct rq *this_rq)
 {
-       int this_cpu = this_rq->cpu, ret = 0, cpu;
+       int this_cpu = this_rq->cpu, cpu;
+       bool resched = false;
        struct task_struct *p;
        struct rq *src_rq;
 
        if (likely(!rt_overloaded(this_rq)))
-               return 0;
+               return;
 
        /*
         * Match the barrier from rt_set_overloaded; this guarantees that if we
@@ -1968,7 +1980,7 @@ static int pull_rt_task(struct rq *this_rq)
 #ifdef HAVE_RT_PUSH_IPI
        if (sched_feat(RT_PUSH_IPI)) {
                tell_cpu_to_push(this_rq);
-               return 0;
+               return;
        }
 #endif
 
@@ -2021,7 +2033,7 @@ static int pull_rt_task(struct rq *this_rq)
                        if (p->prio < src_rq->curr->prio)
                                goto skip;
 
-                       ret = 1;
+                       resched = true;
 
                        deactivate_task(src_rq, p, 0);
                        set_task_cpu(p, this_cpu);
@@ -2037,12 +2049,8 @@ skip:
                double_unlock_balance(this_rq, src_rq);
        }
 
-       return ret;
-}
-
-static void post_schedule_rt(struct rq *rq)
-{
-       push_rt_tasks(rq);
+       if (resched)
+               resched_curr(this_rq);
 }
 
 /*
@@ -2053,53 +2061,13 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
 {
        if (!task_running(rq, p) &&
            !test_tsk_need_resched(rq->curr) &&
-           has_pushable_tasks(rq) &&
-           p->nr_cpus_allowed > 1 &&
+           tsk_nr_cpus_allowed(p) > 1 &&
            (dl_task(rq->curr) || rt_task(rq->curr)) &&
-           (rq->curr->nr_cpus_allowed < 2 ||
+           (tsk_nr_cpus_allowed(rq->curr) < 2 ||
             rq->curr->prio <= p->prio))
                push_rt_tasks(rq);
 }
 
-static void set_cpus_allowed_rt(struct task_struct *p,
-                               const struct cpumask *new_mask)
-{
-       struct rq *rq;
-       int weight;
-
-       BUG_ON(!rt_task(p));
-
-       if (!task_on_rq_queued(p))
-               return;
-
-       weight = cpumask_weight(new_mask);
-
-       /*
-        * Only update if the process changes its state from whether it
-        * can migrate or not.
-        */
-       if ((p->nr_cpus_allowed > 1) == (weight > 1))
-               return;
-
-       rq = task_rq(p);
-
-       /*
-        * The process used to be able to migrate OR it can now migrate
-        */
-       if (weight <= 1) {
-               if (!task_current(rq, p))
-                       dequeue_pushable_task(rq, p);
-               BUG_ON(!rq->rt.rt_nr_migratory);
-               rq->rt.rt_nr_migratory--;
-       } else {
-               if (!task_current(rq, p))
-                       enqueue_pushable_task(rq, p);
-               rq->rt.rt_nr_migratory++;
-       }
-
-       update_rt_migration(&rq->rt);
-}
-
 /* Assumes rq->lock is held */
 static void rq_online_rt(struct rq *rq)
 {
@@ -2138,8 +2106,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
        if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
                return;
 
-       if (pull_rt_task(rq))
-               resched_curr(rq);
+       queue_pull_task(rq);
 }
 
 void __init init_sched_rt_class(void)
@@ -2160,8 +2127,6 @@ void __init init_sched_rt_class(void)
  */
 static void switched_to_rt(struct rq *rq, struct task_struct *p)
 {
-       int check_resched = 1;
-
        /*
         * If we are already running, then there's nothing
         * that needs to be done. But if we are not running
@@ -2171,13 +2136,12 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
         */
        if (task_on_rq_queued(p) && rq->curr != p) {
 #ifdef CONFIG_SMP
-               if (p->nr_cpus_allowed > 1 && rq->rt.overloaded &&
-                   /* Don't resched if we changed runqueues */
-                   push_rt_task(rq) && rq != task_rq(p))
-                       check_resched = 0;
-#endif /* CONFIG_SMP */
-               if (check_resched && p->prio < rq->curr->prio)
+               if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded)
+                       queue_push_tasks(rq);
+#else
+               if (p->prio < rq->curr->prio)
                        resched_curr(rq);
+#endif /* CONFIG_SMP */
        }
 }
 
@@ -2198,14 +2162,13 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
                 * may need to pull tasks to this runqueue.
                 */
                if (oldprio < p->prio)
-                       pull_rt_task(rq);
+                       queue_pull_task(rq);
+
                /*
                 * If there's a higher priority task waiting to run
-                * then reschedule. Note, the above pull_rt_task
-                * can release the rq lock and p could migrate.
-                * Only reschedule if p is still on the same runqueue.
+                * then reschedule.
                 */
-               if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
+               if (p->prio > rq->rt.highest_prio.curr)
                        resched_curr(rq);
 #else
                /* For UP simply resched on drop of prio */
@@ -2313,10 +2276,9 @@ const struct sched_class rt_sched_class = {
 #ifdef CONFIG_SMP
        .select_task_rq         = select_task_rq_rt,
 
-       .set_cpus_allowed       = set_cpus_allowed_rt,
+       .set_cpus_allowed       = set_cpus_allowed_common,
        .rq_online              = rq_online_rt,
        .rq_offline             = rq_offline_rt,
-       .post_schedule          = post_schedule_rt,
        .task_woken             = task_woken_rt,
        .switched_from          = switched_from_rt,
 #endif