These changes are the raw update to linux-4.4.6-rt14. Kernel sources

[kvmfornfv.git] / kernel / kernel / sched / sched.h
diff --git a/kernel/kernel/sched/sched.h b/kernel/kernel/sched/sched.h

index 308f664..7dd5f53 100644 (file)
--- a/kernel/kernel/sched/sched.h
+++ b/kernel/kernel/sched/sched.h
@@ -26,8 +26,14 @@ extern __read_mostly int scheduler_running;
  extern unsigned long calc_load_update;
  extern atomic_long_t calc_load_tasks;
  
+extern void calc_global_load_tick(struct rq *this_rq);
  extern long calc_load_fold_active(struct rq *this_rq);
+
+#ifdef CONFIG_SMP
  extern void update_cpu_load_active(struct rq *this_rq);
+#else
+static inline void update_cpu_load_active(struct rq *this_rq) { }
+#endif
  
  /*
   * Helpers for converting nanosecond timing to jiffy resolution
@@ -78,6 +84,10 @@ extern void update_cpu_load_active(struct rq *this_rq);
   */
  #define RUNTIME_INF    ((u64)~0ULL)
  
+static inline int idle_policy(int policy)
+{
+       return policy == SCHED_IDLE;
+}
  static inline int fair_policy(int policy)
  {
         return policy == SCHED_NORMAL || policy == SCHED_BATCH;
@@ -92,6 +102,11 @@ static inline int dl_policy(int policy)
  {
         return policy == SCHED_DEADLINE;
  }
+static inline bool valid_policy(int policy)
+{
+       return idle_policy(policy) || fair_policy(policy) ||
+               rt_policy(policy) || dl_policy(policy);
+}
  
  static inline int task_has_rt_policy(struct task_struct *p)
  {
@@ -103,11 +118,6 @@ static inline int task_has_dl_policy(struct task_struct *p)
         return dl_policy(p->policy);
  }
  
-static inline bool dl_time_before(u64 a, u64 b)
-{
-       return (s64)(a - b) < 0;
-}
-
  /*
   * Tells if entity @a should preempt entity @b.
   */
@@ -131,6 +141,7 @@ struct rt_bandwidth {
         ktime_t                 rt_period;
         u64                     rt_runtime;
         struct hrtimer          rt_period_timer;
+       unsigned int            rt_period_active;
  };
  
  void __dl_clear_params(struct task_struct *p);
@@ -215,7 +226,7 @@ struct cfs_bandwidth {
         s64 hierarchical_quota;
         u64 runtime_expires;
  
-       int idle, timer_active;
+       int idle, period_active;
         struct hrtimer period_timer, slack_timer;
         struct list_head throttled_cfs_rq;
  
@@ -238,7 +249,6 @@ struct task_group {
  
  #ifdef CONFIG_SMP
         atomic_long_t load_avg;
-       atomic_t runnable_avg;
  #endif
  #endif
  
@@ -306,7 +316,7 @@ extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
  extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
  
  extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
-extern void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b, bool force);
+extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
  extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
  
  extern void free_rt_sched_group(struct task_group *tg);
@@ -359,27 +369,20 @@ struct cfs_rq {
  
  #ifdef CONFIG_SMP
         /*
-        * CFS Load tracking
-        * Under CFS, load is tracked on a per-entity basis and aggregated up.
-        * This allows for the description of both thread and group usage (in
-        * the FAIR_GROUP_SCHED case).
-        * runnable_load_avg is the sum of the load_avg_contrib of the
-        * sched_entities on the rq.
-        * blocked_load_avg is similar to runnable_load_avg except that its
-        * the blocked sched_entities on the rq.
-        * utilization_load_avg is the sum of the average running time of the
-        * sched_entities on the rq.
+        * CFS load tracking
          */
-       unsigned long runnable_load_avg, blocked_load_avg, utilization_load_avg;
-       atomic64_t decay_counter;
-       u64 last_decay;
-       atomic_long_t removed_load;
-
+       struct sched_avg avg;
+       u64 runnable_load_sum;
+       unsigned long runnable_load_avg;
  #ifdef CONFIG_FAIR_GROUP_SCHED
-       /* Required to track per-cpu representation of a task_group */
-       u32 tg_runnable_contrib;
-       unsigned long tg_load_contrib;
+       unsigned long tg_load_avg_contrib;
+#endif
+       atomic_long_t removed_load_avg, removed_util_avg;
+#ifndef CONFIG_64BIT
+       u64 load_last_update_time_copy;
+#endif
  
+#ifdef CONFIG_FAIR_GROUP_SCHED
         /*
          *   h_load = weight * f(tg)
          *
@@ -588,8 +591,6 @@ struct rq {
  #ifdef CONFIG_FAIR_GROUP_SCHED
         /* list of leaf cfs_rq on this cpu: */
         struct list_head leaf_cfs_rq_list;
-
-       struct sched_avg avg;
  #endif /* CONFIG_FAIR_GROUP_SCHED */
  
         /*
@@ -617,9 +618,10 @@ struct rq {
         unsigned long cpu_capacity;
         unsigned long cpu_capacity_orig;
  
+       struct callback_head *balance_callback;
+
         unsigned char idle_balance;
         /* For active balancing */
-       int post_schedule;
         int active_balance;
         int push_cpu;
         struct cpu_stop_work active_balance_work;
@@ -707,7 +709,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
  
  static inline u64 __rq_clock_broken(struct rq *rq)
  {
-       return ACCESS_ONCE(rq->clock);
+       return READ_ONCE(rq->clock);
  }
  
  static inline u64 rq_clock(struct rq *rq)
@@ -760,6 +762,21 @@ extern int migrate_swap(struct task_struct *, struct task_struct *);
  
  #ifdef CONFIG_SMP
  
+static inline void
+queue_balance_callback(struct rq *rq,
+                      struct callback_head *head,
+                      void (*func)(struct rq *rq))
+{
+       lockdep_assert_held(&rq->lock);
+
+       if (unlikely(head->next))
+               return;
+
+       head->func = (void (*)(struct callback_head *))func;
+       head->next = rq->balance_callback;
+       rq->balance_callback = head;
+}
+
  extern void sched_ttwu_pending(void);
  
  #define rcu_dereference_check_sched_domain(p) \
@@ -990,17 +1007,7 @@ extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
  #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
  #endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
  
-#ifdef CONFIG_NUMA_BALANCING
-#define sched_feat_numa(x) sched_feat(x)
-#ifdef CONFIG_SCHED_DEBUG
-#define numabalancing_enabled sched_feat_numa(NUMA)
-#else
-extern bool numabalancing_enabled;
-#endif /* CONFIG_SCHED_DEBUG */
-#else
-#define sched_feat_numa(x) (0)
-#define numabalancing_enabled (0)
-#endif /* CONFIG_NUMA_BALANCING */
+extern struct static_key_false sched_numa_balancing;
  
  static inline u64 global_rt_period(void)
  {
@@ -1042,9 +1049,6 @@ static inline int task_on_rq_migrating(struct task_struct *p)
  #ifndef prepare_arch_switch
  # define prepare_arch_switch(next)     do { } while (0)
  #endif
-#ifndef finish_arch_switch
-# define finish_arch_switch(prev)      do { } while (0)
-#endif
  #ifndef finish_arch_post_lock_switch
  # define finish_arch_post_lock_switch()        do { } while (0)
  #endif
@@ -1068,9 +1072,13 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
          * After ->on_cpu is cleared, the task can be moved to a different CPU.
          * We must ensure this doesn't happen until the switch is completely
          * finished.
+        *
+        * In particular, the load of prev->state in finish_task_switch() must
+        * happen before this.
+        *
+        * Pairs with the control dependency and rmb in try_to_wake_up().
          */
-       smp_wmb();
-       prev->on_cpu = 0;
+       smp_store_release(&prev->on_cpu, 0);
  #endif
  #ifdef CONFIG_DEBUG_SPINLOCK
         /* this is a valid case when another task releases the spinlock */
@@ -1147,16 +1155,18 @@ static const u32 prio_to_wmult[40] = {
   /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
  };
  
-#define ENQUEUE_WAKEUP         1
-#define ENQUEUE_HEAD           2
+#define ENQUEUE_WAKEUP         0x01
+#define ENQUEUE_HEAD           0x02
  #ifdef CONFIG_SMP
-#define ENQUEUE_WAKING         4       /* sched_class::task_waking was called */
+#define ENQUEUE_WAKING         0x04    /* sched_class::task_waking was called */
  #else
-#define ENQUEUE_WAKING         0
+#define ENQUEUE_WAKING         0x00
  #endif
-#define ENQUEUE_REPLENISH      8
+#define ENQUEUE_REPLENISH      0x08
+#define ENQUEUE_RESTORE        0x10
  
-#define DEQUEUE_SLEEP          1
+#define DEQUEUE_SLEEP          0x01
+#define DEQUEUE_SAVE           0x02
  
  #define RETRY_TASK             ((void *)-1UL)
  
@@ -1184,9 +1194,8 @@ struct sched_class {
  
  #ifdef CONFIG_SMP
         int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
-       void (*migrate_task_rq)(struct task_struct *p, int next_cpu);
+       void (*migrate_task_rq)(struct task_struct *p);
  
-       void (*post_schedule) (struct rq *this_rq);
         void (*task_waking) (struct task_struct *task);
         void (*task_woken) (struct rq *this_rq, struct task_struct *task);
  
@@ -1218,7 +1227,7 @@ struct sched_class {
         void (*update_curr) (struct rq *rq);
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
-       void (*task_move_group) (struct task_struct *p, int on_rq);
+       void (*task_move_group) (struct task_struct *p);
  #endif
  };
  
@@ -1247,6 +1256,8 @@ extern void trigger_load_balance(struct rq *rq);
  extern void idle_enter_fair(struct rq *this_rq);
  extern void idle_exit_fair(struct rq *this_rq);
  
+extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
+
  #else
  
  static inline void idle_enter_fair(struct rq *rq) { }
@@ -1285,7 +1296,6 @@ extern void update_max_interval(void);
  extern void init_sched_dl_class(void);
  extern void init_sched_rt_class(void);
  extern void init_sched_fair_class(void);
-extern void init_sched_dl_class(void);
  
  extern void resched_curr(struct rq *rq);
  extern void resched_cpu(int cpu);
@@ -1308,9 +1318,7 @@ extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
  
  unsigned long to_ratio(u64 period, u64 runtime);
  
-extern void update_idle_cpu_load(struct rq *this_rq);
-
-extern void init_task_runnable_average(struct task_struct *p);
+extern void init_entity_runnable_average(struct sched_entity *se);
  
  static inline void add_nr_running(struct rq *rq, unsigned count)
  {
@@ -1406,6 +1414,17 @@ unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
  }
  #endif
  
+#ifndef arch_scale_cpu_capacity
+static __always_inline
+unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
+{
+       if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
+               return sd->smt_gain / sd->span_weight;
+
+       return SCHED_CAPACITY_SCALE;
+}
+#endif
+
  static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
  {
         rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
@@ -1416,8 +1435,6 @@ static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
  static inline void sched_avg_update(struct rq *rq) { }
  #endif
  
-extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
-
  /*
   * __task_rq_lock - lock the rq @p resides on.
   */
@@ -1431,8 +1448,10 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
         for (;;) {
                 rq = task_rq(p);
                 raw_spin_lock(&rq->lock);
-               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
+                       lockdep_pin_lock(&rq->lock);
                         return rq;
+               }
                 raw_spin_unlock(&rq->lock);
  
                 while (unlikely(task_on_rq_migrating(p)))
@@ -1469,8 +1488,10 @@ static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flag
                  * If we observe the new cpu in task_rq_lock, the acquire will
                  * pair with the WMB to ensure we must then also see migrating.
                  */
-               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
+                       lockdep_pin_lock(&rq->lock);
                         return rq;
+               }
                 raw_spin_unlock(&rq->lock);
                 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
  
@@ -1482,6 +1503,7 @@ static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flag
  static inline void __task_rq_unlock(struct rq *rq)
         __releases(rq->lock)
  {
+       lockdep_unpin_lock(&rq->lock);
         raw_spin_unlock(&rq->lock);
  }
  
@@ -1490,6 +1512,7 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
         __releases(rq->lock)
         __releases(p->pi_lock)
  {
+       lockdep_unpin_lock(&rq->lock);
         raw_spin_unlock(&rq->lock);
         raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
  }
@@ -1676,9 +1699,22 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
  
  extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
  extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
+
+#ifdef CONFIG_SCHED_DEBUG
  extern void print_cfs_stats(struct seq_file *m, int cpu);
  extern void print_rt_stats(struct seq_file *m, int cpu);
  extern void print_dl_stats(struct seq_file *m, int cpu);
+extern void
+print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
+
+#ifdef CONFIG_NUMA_BALANCING
+extern void
+show_numa_stats(struct task_struct *p, struct seq_file *m);
+extern void
+print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
+       unsigned long tpf, unsigned long gsf, unsigned long gpf);
+#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_SCHED_DEBUG */
  
  extern void init_cfs_rq(struct cfs_rq *cfs_rq);
  extern void init_rt_rq(struct rt_rq *rt_rq);