These changes are the raw update to linux-4.4.6-rt14. Kernel sources

[kvmfornfv.git] / kernel / kernel / rcu / tree_plugin.h
diff --git a/kernel/kernel/rcu/tree_plugin.h b/kernel/kernel/rcu/tree_plugin.h

index 54da8f4..8e119cf 100644 (file)
--- a/kernel/kernel/rcu/tree_plugin.h
+++ b/kernel/kernel/rcu/tree_plugin.h
@@ -28,7 +28,17 @@
  
  #include "../locking/rtmutex_common.h"
  
-#endif /* #ifdef CONFIG_RCU_BOOST */
+#else /* #ifdef CONFIG_RCU_BOOST */
+
+/*
+ * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
+ * all uses are in dead code.  Provide a definition to keep the compiler
+ * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
+ * This probably needs to be excluded from -rt builds.
+ */
+#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
+
+#endif /* #else #ifdef CONFIG_RCU_BOOST */
  
  /*
   * Control variables for per-CPU and per-rcu_node kthreads.  These
@@ -53,11 +63,11 @@ static void __init rcu_bootup_announce_oddness(void)
  {
         if (IS_ENABLED(CONFIG_RCU_TRACE))
                 pr_info("\tRCU debugfs-based tracing is enabled.\n");
-       if ((IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) ||
-           (!IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32))
+       if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) ||
+           (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32))
                 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
-                      CONFIG_RCU_FANOUT);
-       if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT))
+                      RCU_FANOUT);
+       if (rcu_fanout_exact)
                 pr_info("\tHierarchical RCU autobalancing is disabled.\n");
         if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
                 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
@@ -65,14 +75,12 @@ static void __init rcu_bootup_announce_oddness(void)
                 pr_info("\tRCU lockdep checking is enabled.\n");
         if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_RUNNABLE))
                 pr_info("\tRCU torture testing starts during boot.\n");
-       if (IS_ENABLED(CONFIG_RCU_CPU_STALL_INFO))
-               pr_info("\tAdditional per-CPU info printed with stalls.\n");
-       if (NUM_RCU_LVL_4 != 0)
-               pr_info("\tFour-level hierarchy is enabled.\n");
-       if (CONFIG_RCU_FANOUT_LEAF != 16)
+       if (RCU_NUM_LVLS >= 4)
+               pr_info("\tFour(or more)-level hierarchy is enabled.\n");
+       if (RCU_FANOUT_LEAF != 16)
                 pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
-                       CONFIG_RCU_FANOUT_LEAF);
-       if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
+                       RCU_FANOUT_LEAF);
+       if (rcu_fanout_leaf != RCU_FANOUT_LEAF)
                 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
         if (nr_cpu_ids != NR_CPUS)
                 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
@@ -83,9 +91,9 @@ static void __init rcu_bootup_announce_oddness(void)
  #ifdef CONFIG_PREEMPT_RCU
  
  RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
-static struct rcu_state *rcu_state_p = &rcu_preempt_state;
+static struct rcu_state *const rcu_state_p = &rcu_preempt_state;
+static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;
  
-static int rcu_preempted_readers_exp(struct rcu_node *rnp);
  static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
                                bool wake);
  
@@ -98,6 +106,147 @@ static void __init rcu_bootup_announce(void)
         rcu_bootup_announce_oddness();
  }
  
+/* Flags for rcu_preempt_ctxt_queue() decision table. */
+#define RCU_GP_TASKS   0x8
+#define RCU_EXP_TASKS  0x4
+#define RCU_GP_BLKD    0x2
+#define RCU_EXP_BLKD   0x1
+
+/*
+ * Queues a task preempted within an RCU-preempt read-side critical
+ * section into the appropriate location within the ->blkd_tasks list,
+ * depending on the states of any ongoing normal and expedited grace
+ * periods.  The ->gp_tasks pointer indicates which element the normal
+ * grace period is waiting on (NULL if none), and the ->exp_tasks pointer
+ * indicates which element the expedited grace period is waiting on (again,
+ * NULL if none).  If a grace period is waiting on a given element in the
+ * ->blkd_tasks list, it also waits on all subsequent elements.  Thus,
+ * adding a task to the tail of the list blocks any grace period that is
+ * already waiting on one of the elements.  In contrast, adding a task
+ * to the head of the list won't block any grace period that is already
+ * waiting on one of the elements.
+ *
+ * This queuing is imprecise, and can sometimes make an ongoing grace
+ * period wait for a task that is not strictly speaking blocking it.
+ * Given the choice, we needlessly block a normal grace period rather than
+ * blocking an expedited grace period.
+ *
+ * Note that an endless sequence of expedited grace periods still cannot
+ * indefinitely postpone a normal grace period.  Eventually, all of the
+ * fixed number of preempted tasks blocking the normal grace period that are
+ * not also blocking the expedited grace period will resume and complete
+ * their RCU read-side critical sections.  At that point, the ->gp_tasks
+ * pointer will equal the ->exp_tasks pointer, at which point the end of
+ * the corresponding expedited grace period will also be the end of the
+ * normal grace period.
+ */
+static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp,
+                                  unsigned long flags) __releases(rnp->lock)
+{
+       int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) +
+                        (rnp->exp_tasks ? RCU_EXP_TASKS : 0) +
+                        (rnp->qsmask & rdp->grpmask ? RCU_GP_BLKD : 0) +
+                        (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
+       struct task_struct *t = current;
+
+       /*
+        * Decide where to queue the newly blocked task.  In theory,
+        * this could be an if-statement.  In practice, when I tried
+        * that, it was quite messy.
+        */
+       switch (blkd_state) {
+       case 0:
+       case                RCU_EXP_TASKS:
+       case                RCU_EXP_TASKS + RCU_GP_BLKD:
+       case RCU_GP_TASKS:
+       case RCU_GP_TASKS + RCU_EXP_TASKS:
+
+               /*
+                * Blocking neither GP, or first task blocking the normal
+                * GP but not blocking the already-waiting expedited GP.
+                * Queue at the head of the list to avoid unnecessarily
+                * blocking the already-waiting GPs.
+                */
+               list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
+               break;
+
+       case                                              RCU_EXP_BLKD:
+       case                                RCU_GP_BLKD:
+       case                                RCU_GP_BLKD + RCU_EXP_BLKD:
+       case RCU_GP_TASKS +                               RCU_EXP_BLKD:
+       case RCU_GP_TASKS +                 RCU_GP_BLKD + RCU_EXP_BLKD:
+       case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
+
+               /*
+                * First task arriving that blocks either GP, or first task
+                * arriving that blocks the expedited GP (with the normal
+                * GP already waiting), or a task arriving that blocks
+                * both GPs with both GPs already waiting.  Queue at the
+                * tail of the list to avoid any GP waiting on any of the
+                * already queued tasks that are not blocking it.
+                */
+               list_add_tail(&t->rcu_node_entry, &rnp->blkd_tasks);
+               break;
+
+       case                RCU_EXP_TASKS +               RCU_EXP_BLKD:
+       case                RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
+       case RCU_GP_TASKS + RCU_EXP_TASKS +               RCU_EXP_BLKD:
+
+               /*
+                * Second or subsequent task blocking the expedited GP.
+                * The task either does not block the normal GP, or is the
+                * first task blocking the normal GP.  Queue just after
+                * the first task blocking the expedited GP.
+                */
+               list_add(&t->rcu_node_entry, rnp->exp_tasks);
+               break;
+
+       case RCU_GP_TASKS +                 RCU_GP_BLKD:
+       case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD:
+
+               /*
+                * Second or subsequent task blocking the normal GP.
+                * The task does not block the expedited GP. Queue just
+                * after the first task blocking the normal GP.
+                */
+               list_add(&t->rcu_node_entry, rnp->gp_tasks);
+               break;
+
+       default:
+
+               /* Yet another exercise in excessive paranoia. */
+               WARN_ON_ONCE(1);
+               break;
+       }
+
+       /*
+        * We have now queued the task.  If it was the first one to
+        * block either grace period, update the ->gp_tasks and/or
+        * ->exp_tasks pointers, respectively, to reference the newly
+        * blocked tasks.
+        */
+       if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD))
+               rnp->gp_tasks = &t->rcu_node_entry;
+       if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
+               rnp->exp_tasks = &t->rcu_node_entry;
+       raw_spin_unlock(&rnp->lock);
+
+       /*
+        * Report the quiescent state for the expedited GP.  This expedited
+        * GP should not be able to end until we report, so there should be
+        * no need to check for a subsequent expedited GP.  (Though we are
+        * still in a quiescent state in any case.)
+        */
+       if (blkd_state & RCU_EXP_BLKD &&
+           t->rcu_read_unlock_special.b.exp_need_qs) {
+               t->rcu_read_unlock_special.b.exp_need_qs = false;
+               rcu_report_exp_rdp(rdp->rsp, rdp, true);
+       } else {
+               WARN_ON_ONCE(t->rcu_read_unlock_special.b.exp_need_qs);
+       }
+       local_irq_restore(flags);
+}
+
  /*
   * Record a preemptible-RCU quiescent state for the specified CPU.  Note
   * that this just means that the task currently running on the CPU is
@@ -109,11 +258,11 @@ static void __init rcu_bootup_announce(void)
   */
  static void rcu_preempt_qs(void)
  {
-       if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) {
+       if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) {
                 trace_rcu_grace_period(TPS("rcu_preempt"),
-                                      __this_cpu_read(rcu_preempt_data.gpnum),
+                                      __this_cpu_read(rcu_data_p->gpnum),
                                        TPS("cpuqs"));
-               __this_cpu_write(rcu_preempt_data.passed_quiesce, 1);
+               __this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false);
                 barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
                 current->rcu_read_unlock_special.b.need_qs = false;
         }
@@ -143,7 +292,7 @@ static void rcu_preempt_note_context_switch(void)
             !t->rcu_read_unlock_special.b.blocked) {
  
                 /* Possibly blocking in an RCU read-side critical section. */
-               rdp = this_cpu_ptr(rcu_preempt_state.rda);
+               rdp = this_cpu_ptr(rcu_state_p->rda);
                 rnp = rdp->mynode;
                 raw_spin_lock_irqsave(&rnp->lock, flags);
                 smp_mb__after_unlock_lock();
@@ -151,43 +300,18 @@ static void rcu_preempt_note_context_switch(void)
                 t->rcu_blocked_node = rnp;
  
                 /*
-                * If this CPU has already checked in, then this task
-                * will hold up the next grace period rather than the
-                * current grace period.  Queue the task accordingly.
-                * If the task is queued for the current grace period
-                * (i.e., this CPU has not yet passed through a quiescent
-                * state for the current grace period), then as long
-                * as that task remains queued, the current grace period
-                * cannot end.  Note that there is some uncertainty as
-                * to exactly when the current grace period started.
-                * We take a conservative approach, which can result
-                * in unnecessarily waiting on tasks that started very
-                * slightly after the current grace period began.  C'est
-                * la vie!!!
-                *
-                * But first, note that the current CPU must still be
-                * on line!
+                * Verify the CPU's sanity, trace the preemption, and
+                * then queue the task as required based on the states
+                * of any ongoing and expedited grace periods.
                  */
                 WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0);
                 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
-               if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
-                       list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
-                       rnp->gp_tasks = &t->rcu_node_entry;
-#ifdef CONFIG_RCU_BOOST
-                       if (rnp->boost_tasks != NULL)
-                               rnp->boost_tasks = rnp->gp_tasks;
-#endif /* #ifdef CONFIG_RCU_BOOST */
-               } else {
-                       list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
-                       if (rnp->qsmask & rdp->grpmask)
-                               rnp->gp_tasks = &t->rcu_node_entry;
-               }
                 trace_rcu_preempt_task(rdp->rsp->name,
                                        t->pid,
                                        (rnp->qsmask & rdp->grpmask)
                                        ? rnp->gpnum
                                        : rnp->gpnum + 1);
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+               rcu_preempt_ctxt_queue(rnp, rdp, flags);
         } else if (t->rcu_read_lock_nesting < 0 &&
                    t->rcu_read_unlock_special.s) {
  
@@ -256,9 +380,8 @@ void rcu_read_unlock_special(struct task_struct *t)
         bool empty_exp_now;
         unsigned long flags;
         struct list_head *np;
-#ifdef CONFIG_RCU_BOOST
         bool drop_boost_mutex = false;
-#endif /* #ifdef CONFIG_RCU_BOOST */
+       struct rcu_data *rdp;
         struct rcu_node *rnp;
         union rcu_special special;
  
@@ -269,8 +392,8 @@ void rcu_read_unlock_special(struct task_struct *t)
         local_irq_save(flags);
  
         /*
-        * If RCU core is waiting for this CPU to exit critical section,
-        * let it know that we have done so.  Because irqs are disabled,
+        * If RCU core is waiting for this CPU to exit its critical section,
+        * report the fact that it has exited.  Because irqs are disabled,
          * t->rcu_read_unlock_special cannot change.
          */
         special = t->rcu_read_unlock_special;
@@ -283,13 +406,32 @@ void rcu_read_unlock_special(struct task_struct *t)
                 }
         }
  
+       /*
+        * Respond to a request for an expedited grace period, but only if
+        * we were not preempted, meaning that we were running on the same
+        * CPU throughout.  If we were preempted, the exp_need_qs flag
+        * would have been cleared at the time of the first preemption,
+        * and the quiescent state would be reported when we were dequeued.
+        */
+       if (special.b.exp_need_qs) {
+               WARN_ON_ONCE(special.b.blocked);
+               t->rcu_read_unlock_special.b.exp_need_qs = false;
+               rdp = this_cpu_ptr(rcu_state_p->rda);
+               rcu_report_exp_rdp(rcu_state_p, rdp, true);
+               if (!t->rcu_read_unlock_special.s) {
+                       local_irq_restore(flags);
+                       return;
+               }
+       }
+
         /* Hardware IRQ handlers cannot block, complain if they get here. */
         if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) {
                 lockdep_rcu_suspicious(__FILE__, __LINE__,
                                        "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
-               pr_alert("->rcu_read_unlock_special: %#x (b: %d, nq: %d)\n",
+               pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n",
                          t->rcu_read_unlock_special.s,
                          t->rcu_read_unlock_special.b.blocked,
+                        t->rcu_read_unlock_special.b.exp_need_qs,
                          t->rcu_read_unlock_special.b.need_qs);
                 local_irq_restore(flags);
                 return;
@@ -300,9 +442,11 @@ void rcu_read_unlock_special(struct task_struct *t)
                 t->rcu_read_unlock_special.b.blocked = false;
  
                 /*
-                * Remove this task from the list it blocked on.  The
-                * task can migrate while we acquire the lock, but at
-                * most one time.  So at most two passes through loop.
+                * Remove this task from the list it blocked on.  The task
+                * now remains queued on the rcu_node corresponding to
+                * the CPU it first blocked on, so the first attempt to
+                * acquire the task's rcu_node's ->lock will succeed.
+                * Keep the loop and add a WARN_ON() out of sheer paranoia.
                  */
                 for (;;) {
                         rnp = t->rcu_blocked_node;
@@ -310,10 +454,11 @@ void rcu_read_unlock_special(struct task_struct *t)
                         smp_mb__after_unlock_lock();
                         if (rnp == t->rcu_blocked_node)
                                 break;
+                       WARN_ON_ONCE(1);
                         raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
                 }
                 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
-               empty_exp = !rcu_preempted_readers_exp(rnp);
+               empty_exp = sync_rcu_preempt_exp_done(rnp);
                 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
                 np = rcu_next_node_entry(t, rnp);
                 list_del_init(&t->rcu_node_entry);
@@ -324,12 +469,12 @@ void rcu_read_unlock_special(struct task_struct *t)
                         rnp->gp_tasks = np;
                 if (&t->rcu_node_entry == rnp->exp_tasks)
                         rnp->exp_tasks = np;
-#ifdef CONFIG_RCU_BOOST
-               if (&t->rcu_node_entry == rnp->boost_tasks)
-                       rnp->boost_tasks = np;
-               /* Snapshot ->boost_mtx ownership with rcu_node lock held. */
-               drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
-#endif /* #ifdef CONFIG_RCU_BOOST */
+               if (IS_ENABLED(CONFIG_RCU_BOOST)) {
+                       if (&t->rcu_node_entry == rnp->boost_tasks)
+                               rnp->boost_tasks = np;
+                       /* Snapshot ->boost_mtx ownership w/rnp->lock held. */
+                       drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
+               }
  
                 /*
                  * If this was the last task on the current list, and if
@@ -337,7 +482,7 @@ void rcu_read_unlock_special(struct task_struct *t)
                  * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
                  * so we must take a snapshot of the expedited state.
                  */
-               empty_exp_now = !rcu_preempted_readers_exp(rnp);
+               empty_exp_now = sync_rcu_preempt_exp_done(rnp);
                 if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
                         trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
                                                          rnp->gpnum,
@@ -346,24 +491,21 @@ void rcu_read_unlock_special(struct task_struct *t)
                                                          rnp->grplo,
                                                          rnp->grphi,
                                                          !!rnp->gp_tasks);
-                       rcu_report_unblock_qs_rnp(&rcu_preempt_state,
-                                                 rnp, flags);
+                       rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags);
                 } else {
                         raw_spin_unlock_irqrestore(&rnp->lock, flags);
                 }
  
-#ifdef CONFIG_RCU_BOOST
                 /* Unboost if we were boosted. */
-               if (drop_boost_mutex)
+               if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
                         rt_mutex_unlock(&rnp->boost_mtx);
-#endif /* #ifdef CONFIG_RCU_BOOST */
  
                 /*
                  * If this was the last task on the expedited lists,
                  * then we need to report up the rcu_node hierarchy.
                  */
                 if (!empty_exp && empty_exp_now)
-                       rcu_report_exp_rnp(&rcu_preempt_state, rnp, true);
+                       rcu_report_exp_rnp(rcu_state_p, rnp, true);
         } else {
                 local_irq_restore(flags);
         }
@@ -383,7 +525,7 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
                 raw_spin_unlock_irqrestore(&rnp->lock, flags);
                 return;
         }
-       t = list_entry(rnp->gp_tasks,
+       t = list_entry(rnp->gp_tasks->prev,
                        struct task_struct, rcu_node_entry);
         list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
                 sched_show_task(t);
@@ -403,8 +545,6 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
                 rcu_print_detail_task_stall_rnp(rnp);
  }
  
-#ifdef CONFIG_RCU_CPU_STALL_INFO
-
  static void rcu_print_task_stall_begin(struct rcu_node *rnp)
  {
         pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
@@ -416,18 +556,6 @@ static void rcu_print_task_stall_end(void)
         pr_cont("\n");
  }
  
-#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
-
-static void rcu_print_task_stall_begin(struct rcu_node *rnp)
-{
-}
-
-static void rcu_print_task_stall_end(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
-
  /*
   * Scan the current list of tasks blocked within RCU read-side critical
   * sections, printing out the tid of each.
@@ -440,7 +568,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
         if (!rcu_preempt_blocked_readers_cgp(rnp))
                 return 0;
         rcu_print_task_stall_begin(rnp);
-       t = list_entry(rnp->gp_tasks,
+       t = list_entry(rnp->gp_tasks->prev,
                        struct task_struct, rcu_node_entry);
         list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
                 pr_cont(" P%d", t->pid);
@@ -450,6 +578,27 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
         return ndetected;
  }
  
+/*
+ * Scan the current list of tasks blocked within RCU read-side critical
+ * sections, printing out the tid of each that is blocking the current
+ * expedited grace period.
+ */
+static int rcu_print_task_exp_stall(struct rcu_node *rnp)
+{
+       struct task_struct *t;
+       int ndetected = 0;
+
+       if (!rnp->exp_tasks)
+               return 0;
+       t = list_entry(rnp->exp_tasks->prev,
+                      struct task_struct, rcu_node_entry);
+       list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+               pr_cont(" P%d", t->pid);
+               ndetected++;
+       }
+       return ndetected;
+}
+
  /*
   * Check that the list of blocked tasks for the newly completed grace
   * period is in fact empty.  It is a serious bug to complete a grace
@@ -484,17 +633,17 @@ static void rcu_preempt_check_callbacks(void)
                 return;
         }
         if (t->rcu_read_lock_nesting > 0 &&
-           __this_cpu_read(rcu_preempt_data.qs_pending) &&
-           !__this_cpu_read(rcu_preempt_data.passed_quiesce))
+           __this_cpu_read(rcu_data_p->core_needs_qs) &&
+           __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm))
                 t->rcu_read_unlock_special.b.need_qs = true;
  }
  
  /*
   * Queue a preemptible-RCU callback for invocation after a grace period.
   */
-void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu(struct rcu_head *head, rcu_callback_t func)
  {
-       __call_rcu(head, func, &rcu_preempt_state, -1, 0);
+       __call_rcu(head, func, rcu_state_p, -1, 0);
  }
  EXPORT_SYMBOL_GPL(call_rcu);
  
@@ -514,10 +663,10 @@ EXPORT_SYMBOL_GPL(call_rcu);
   */
  void synchronize_rcu(void)
  {
-       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
-                          !lock_is_held(&rcu_lock_map) &&
-                          !lock_is_held(&rcu_sched_lock_map),
-                          "Illegal synchronize_rcu() in RCU read-side critical section");
+       RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
+                        lock_is_held(&rcu_lock_map) ||
+                        lock_is_held(&rcu_sched_lock_map),
+                        "Illegal synchronize_rcu() in RCU read-side critical section");
         if (!rcu_scheduler_active)
                 return;
         if (rcu_gp_is_expedited())
@@ -527,157 +676,41 @@ void synchronize_rcu(void)
  }
  EXPORT_SYMBOL_GPL(synchronize_rcu);
  
-static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
-static unsigned long sync_rcu_preempt_exp_count;
-static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
-
-/*
- * Return non-zero if there are any tasks in RCU read-side critical
- * sections blocking the current preemptible-RCU expedited grace period.
- * If there is no preemptible-RCU expedited grace period currently in
- * progress, returns zero unconditionally.
- */
-static int rcu_preempted_readers_exp(struct rcu_node *rnp)
-{
-       return rnp->exp_tasks != NULL;
-}
-
-/*
- * return non-zero if there is no RCU expedited grace period in progress
- * for the specified rcu_node structure, in other words, if all CPUs and
- * tasks covered by the specified rcu_node structure have done their bit
- * for the current expedited grace period.  Works only for preemptible
- * RCU -- other RCU implementation use other means.
- *
- * Caller must hold sync_rcu_preempt_exp_mutex.
- */
-static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
-{
-       return !rcu_preempted_readers_exp(rnp) &&
-              ACCESS_ONCE(rnp->expmask) == 0;
-}
-
-/*
- * Report the exit from RCU read-side critical section for the last task
- * that queued itself during or before the current expedited preemptible-RCU
- * grace period.  This event is reported either to the rcu_node structure on
- * which the task was queued or to one of that rcu_node structure's ancestors,
- * recursively up the tree.  (Calm down, calm down, we do the recursion
- * iteratively!)
- *
- * Caller must hold sync_rcu_preempt_exp_mutex.
- */
-static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
-                              bool wake)
-{
-       unsigned long flags;
-       unsigned long mask;
-
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       smp_mb__after_unlock_lock();
-       for (;;) {
-               if (!sync_rcu_preempt_exp_done(rnp)) {
-                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-                       break;
-               }
-               if (rnp->parent == NULL) {
-                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-                       if (wake) {
-                               smp_mb(); /* EGP done before wake_up(). */
-                               wake_up(&sync_rcu_preempt_exp_wq);
-                       }
-                       break;
-               }
-               mask = rnp->grpmask;
-               raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
-               rnp = rnp->parent;
-               raw_spin_lock(&rnp->lock); /* irqs already disabled */
-               smp_mb__after_unlock_lock();
-               rnp->expmask &= ~mask;
-       }
-}
-
-/*
- * Snapshot the tasks blocking the newly started preemptible-RCU expedited
- * grace period for the specified rcu_node structure, phase 1.  If there
- * are such tasks, set the ->expmask bits up the rcu_node tree and also
- * set the ->expmask bits on the leaf rcu_node structures to tell phase 2
- * that work is needed here.
- *
- * Caller must hold sync_rcu_preempt_exp_mutex.
- */
-static void
-sync_rcu_preempt_exp_init1(struct rcu_state *rsp, struct rcu_node *rnp)
-{
-       unsigned long flags;
-       unsigned long mask;
-       struct rcu_node *rnp_up;
-
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       smp_mb__after_unlock_lock();
-       WARN_ON_ONCE(rnp->expmask);
-       WARN_ON_ONCE(rnp->exp_tasks);
-       if (!rcu_preempt_has_tasks(rnp)) {
-               /* No blocked tasks, nothing to do. */
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               return;
-       }
-       /* Call for Phase 2 and propagate ->expmask bits up the tree. */
-       rnp->expmask = 1;
-       rnp_up = rnp;
-       while (rnp_up->parent) {
-               mask = rnp_up->grpmask;
-               rnp_up = rnp_up->parent;
-               if (rnp_up->expmask & mask)
-                       break;
-               raw_spin_lock(&rnp_up->lock); /* irqs already off */
-               smp_mb__after_unlock_lock();
-               rnp_up->expmask |= mask;
-               raw_spin_unlock(&rnp_up->lock); /* irqs still off */
-       }
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-}
-
  /*
- * Snapshot the tasks blocking the newly started preemptible-RCU expedited
- * grace period for the specified rcu_node structure, phase 2.  If the
- * leaf rcu_node structure has its ->expmask field set, check for tasks.
- * If there are some, clear ->expmask and set ->exp_tasks accordingly,
- * then initiate RCU priority boosting.  Otherwise, clear ->expmask and
- * invoke rcu_report_exp_rnp() to clear out the upper-level ->expmask bits,
- * enabling rcu_read_unlock_special() to do the bit-clearing.
- *
- * Caller must hold sync_rcu_preempt_exp_mutex.
+ * Remote handler for smp_call_function_single().  If there is an
+ * RCU read-side critical section in effect, request that the
+ * next rcu_read_unlock() record the quiescent state up the
+ * ->expmask fields in the rcu_node tree.  Otherwise, immediately
+ * report the quiescent state.
   */
-static void
-sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp)
+static void sync_rcu_exp_handler(void *info)
  {
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&rnp->lock, flags);
-       smp_mb__after_unlock_lock();
-       if (!rnp->expmask) {
-               /* Phase 1 didn't do anything, so Phase 2 doesn't either. */
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               return;
-       }
-
-       /* Phase 1 is over. */
-       rnp->expmask = 0;
+       struct rcu_data *rdp;
+       struct rcu_state *rsp = info;
+       struct task_struct *t = current;
  
         /*
-        * If there are still blocked tasks, set up ->exp_tasks so that
-        * rcu_read_unlock_special() will wake us and then boost them.
+        * Within an RCU read-side critical section, request that the next
+        * rcu_read_unlock() report.  Unless this RCU read-side critical
+        * section has already blocked, in which case it is already set
+        * up for the expedited grace period to wait on it.
          */
-       if (rcu_preempt_has_tasks(rnp)) {
-               rnp->exp_tasks = rnp->blkd_tasks.next;
-               rcu_initiate_boost(rnp, flags);  /* releases rnp->lock */
+       if (t->rcu_read_lock_nesting > 0 &&
+           !t->rcu_read_unlock_special.b.blocked) {
+               t->rcu_read_unlock_special.b.exp_need_qs = true;
                 return;
         }
  
-       /* No longer any blocked tasks, so undo bit setting. */
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
-       rcu_report_exp_rnp(rsp, rnp, false);
+       /*
+        * We are either exiting an RCU read-side critical section (negative
+        * values of t->rcu_read_lock_nesting) or are not in one at all
+        * (zero value of t->rcu_read_lock_nesting).  Or we are in an RCU
+        * read-side critical section that blocked before this expedited
+        * grace period started.  Either way, we can immediately report
+        * the quiescent state.
+        */
+       rdp = this_cpu_ptr(rsp->rda);
+       rcu_report_exp_rdp(rsp, rdp, true);
  }
  
  /**
@@ -695,81 +728,28 @@ sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp)
  void synchronize_rcu_expedited(void)
  {
         struct rcu_node *rnp;
-       struct rcu_state *rsp = &rcu_preempt_state;
-       unsigned long snap;
-       int trycount = 0;
+       struct rcu_node *rnp_unlock;
+       struct rcu_state *rsp = rcu_state_p;
+       unsigned long s;
  
-       smp_mb(); /* Caller's modifications seen first by other CPUs. */
-       snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
-       smp_mb(); /* Above access cannot bleed into critical section. */
+       s = rcu_exp_gp_seq_snap(rsp);
  
-       /*
-        * Block CPU-hotplug operations.  This means that any CPU-hotplug
-        * operation that finds an rcu_node structure with tasks in the
-        * process of being boosted will know that all tasks blocking
-        * this expedited grace period will already be in the process of
-        * being boosted.  This simplifies the process of moving tasks
-        * from leaf to root rcu_node structures.
-        */
-       if (!try_get_online_cpus()) {
-               /* CPU-hotplug operation in flight, fall back to normal GP. */
-               wait_rcu_gp(call_rcu);
-               return;
-       }
+       rnp_unlock = exp_funnel_lock(rsp, s);
+       if (rnp_unlock == NULL)
+               return;  /* Someone else did our work for us. */
  
-       /*
-        * Acquire lock, falling back to synchronize_rcu() if too many
-        * lock-acquisition failures.  Of course, if someone does the
-        * expedited grace period for us, just leave.
-        */
-       while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
-               if (ULONG_CMP_LT(snap,
-                   ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
-                       put_online_cpus();
-                       goto mb_ret; /* Others did our work for us. */
-               }
-               if (trycount++ < 10) {
-                       udelay(trycount * num_online_cpus());
-               } else {
-                       put_online_cpus();
-                       wait_rcu_gp(call_rcu);
-                       return;
-               }
-       }
-       if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
-               put_online_cpus();
-               goto unlock_mb_ret; /* Others did our work for us. */
-       }
+       rcu_exp_gp_seq_start(rsp);
  
-       /* force all RCU readers onto ->blkd_tasks lists. */
-       synchronize_sched_expedited();
-
-       /*
-        * Snapshot current state of ->blkd_tasks lists into ->expmask.
-        * Phase 1 sets bits and phase 2 permits rcu_read_unlock_special()
-        * to start clearing them.  Doing this in one phase leads to
-        * strange races between setting and clearing bits, so just say "no"!
-        */
-       rcu_for_each_leaf_node(rsp, rnp)
-               sync_rcu_preempt_exp_init1(rsp, rnp);
-       rcu_for_each_leaf_node(rsp, rnp)
-               sync_rcu_preempt_exp_init2(rsp, rnp);
-
-       put_online_cpus();
+       /* Initialize the rcu_node tree in preparation for the wait. */
+       sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler);
  
         /* Wait for snapshotted ->blkd_tasks lists to drain. */
         rnp = rcu_get_root(rsp);
-       wait_event(sync_rcu_preempt_exp_wq,
-                  sync_rcu_preempt_exp_done(rnp));
+       synchronize_sched_expedited_wait(rsp);
  
         /* Clean up and exit. */
-       smp_mb(); /* ensure expedited GP seen before counter increment. */
-       ACCESS_ONCE(sync_rcu_preempt_exp_count) =
-                                       sync_rcu_preempt_exp_count + 1;
-unlock_mb_ret:
-       mutex_unlock(&sync_rcu_preempt_exp_mutex);
-mb_ret:
-       smp_mb(); /* ensure subsequent action seen after grace period. */
+       rcu_exp_gp_seq_end(rsp);
+       mutex_unlock(&rnp_unlock->exp_funnel_mutex);
  }
  EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  
@@ -783,7 +763,7 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
   */
  void rcu_barrier(void)
  {
-       _rcu_barrier(&rcu_preempt_state);
+       _rcu_barrier(rcu_state_p);
  }
  EXPORT_SYMBOL_GPL(rcu_barrier);
  
@@ -792,7 +772,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier);
   */
  static void __init __rcu_init_preempt(void)
  {
-       rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
+       rcu_init_one(rcu_state_p, rcu_data_p);
  }
  
  /*
@@ -815,7 +795,8 @@ void exit_rcu(void)
  
  #else /* #ifdef CONFIG_PREEMPT_RCU */
  
-static struct rcu_state *rcu_state_p = &rcu_sched_state;
+static struct rcu_state *const rcu_state_p = &rcu_sched_state;
+static struct rcu_data __percpu *const rcu_data_p = &rcu_sched_data;
  
  /*
   * Tell them what RCU they are running.
@@ -868,6 +849,16 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
         return 0;
  }
  
+/*
+ * Because preemptible RCU does not exist, we never have to check for
+ * tasks blocked within RCU read-side critical sections that are
+ * blocking the current expedited grace period.
+ */
+static int rcu_print_task_exp_stall(struct rcu_node *rnp)
+{
+       return 0;
+}
+
  /*
   * Because there is no preemptible RCU, there can be no readers blocked,
   * so there is no need to check for blocked tasks.  So check only for
@@ -981,8 +972,8 @@ static int rcu_boost(struct rcu_node *rnp)
         struct task_struct *t;
         struct list_head *tb;
  
-       if (ACCESS_ONCE(rnp->exp_tasks) == NULL &&
-           ACCESS_ONCE(rnp->boost_tasks) == NULL)
+       if (READ_ONCE(rnp->exp_tasks) == NULL &&
+           READ_ONCE(rnp->boost_tasks) == NULL)
                 return 0;  /* Nothing left to boost. */
  
         raw_spin_lock_irqsave(&rnp->lock, flags);
@@ -1035,13 +1026,12 @@ static int rcu_boost(struct rcu_node *rnp)
         rt_mutex_lock(&rnp->boost_mtx);
         rt_mutex_unlock(&rnp->boost_mtx);  /* Then keep lockdep happy. */
  
-       return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
-              ACCESS_ONCE(rnp->boost_tasks) != NULL;
+       return READ_ONCE(rnp->exp_tasks) != NULL ||
+              READ_ONCE(rnp->boost_tasks) != NULL;
  }
  
  /*
- * Priority-boosting kthread.  One per leaf rcu_node and one for the
- * root rcu_node.
+ * Priority-boosting kthread, one per leaf rcu_node.
   */
  static int rcu_boost_kthread(void *arg)
  {
@@ -1143,7 +1133,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
         struct sched_param sp;
         struct task_struct *t;
  
-       if (&rcu_preempt_state != rsp)
+       if (rcu_state_p != rsp)
                 return 0;
  
         if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0)
@@ -1257,13 +1247,12 @@ static void rcu_prepare_kthreads(int cpu)
   * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
   * any flavor of RCU.
   */
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
-int rcu_needs_cpu(unsigned long *delta_jiffies)
+int rcu_needs_cpu(u64 basemono, u64 *nextevt)
  {
-       *delta_jiffies = ULONG_MAX;
-       return rcu_cpu_has_callbacks(NULL);
+       *nextevt = KTIME_MAX;
+       return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)
+              ? 0 : rcu_cpu_has_callbacks(NULL);
  }
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
  #endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */
  
  #if !defined(CONFIG_RCU_FAST_NO_HZ)
@@ -1324,8 +1313,6 @@ module_param(rcu_idle_gp_delay, int, 0644);
  static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
  module_param(rcu_idle_lazy_gp_delay, int, 0644);
  
-extern int tick_nohz_active;
-
  /*
   * Try to advance callbacks for all flavors of RCU on the current CPU, but
   * only if it has been awhile since the last time we did so.  Afterwards,
@@ -1354,7 +1341,7 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
                  * callbacks not yet ready to invoke.
                  */
                 if ((rdp->completed != rnp->completed ||
-                    unlikely(ACCESS_ONCE(rdp->gpwrap))) &&
+                    unlikely(READ_ONCE(rdp->gpwrap))) &&
                     rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
                         note_gp_changes(rsp, rdp);
  
@@ -1374,17 +1361,22 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
   *
   * The caller must have disabled interrupts.
   */
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
-int rcu_needs_cpu(unsigned long *dj)
+int rcu_needs_cpu(u64 basemono, u64 *nextevt)
  {
         struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+       unsigned long dj;
+
+       if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) {
+               *nextevt = KTIME_MAX;
+               return 0;
+       }
  
         /* Snapshot to detect later posting of non-lazy callback. */
         rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
  
         /* If no callbacks, RCU doesn't need the CPU. */
         if (!rcu_cpu_has_callbacks(&rdtp->all_lazy)) {
-               *dj = ULONG_MAX;
+               *nextevt = KTIME_MAX;
                 return 0;
         }
  
@@ -1398,15 +1390,16 @@ int rcu_needs_cpu(unsigned long *dj)
  
         /* Request timer delay depending on laziness, and round. */
         if (!rdtp->all_lazy) {
-               *dj = round_up(rcu_idle_gp_delay + jiffies,
+               dj = round_up(rcu_idle_gp_delay + jiffies,
                                rcu_idle_gp_delay) - jiffies;
         } else {
-               *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
+               dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
         }
+       *nextevt = basemono + dj * TICK_NSEC;
         return 0;
  }
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
  #endif /* #ifndef CONFIG_PREEMPT_RT_FULL */
+
  /*
   * Prepare a CPU for idle from an RCU perspective.  The first major task
   * is to sense whether nohz mode has been enabled or disabled via sysfs.
@@ -1419,7 +1412,6 @@ int rcu_needs_cpu(unsigned long *dj)
   */
  static void rcu_prepare_for_idle(void)
  {
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
         bool needwake;
         struct rcu_data *rdp;
         struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
@@ -1427,8 +1419,11 @@ static void rcu_prepare_for_idle(void)
         struct rcu_state *rsp;
         int tne;
  
+       if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL))
+               return;
+
         /* Handle nohz enablement switches conservatively. */
-       tne = ACCESS_ONCE(tick_nohz_active);
+       tne = READ_ONCE(tick_nohz_active);
         if (tne != rdtp->tick_nohz_enabled_snap) {
                 if (rcu_cpu_has_callbacks(NULL))
                         invoke_rcu_core(); /* force nohz to see update. */
@@ -1474,7 +1469,6 @@ static void rcu_prepare_for_idle(void)
                 if (needwake)
                         rcu_gp_kthread_wake(rsp);
         }
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
  }
  
  /*
@@ -1484,12 +1478,11 @@ static void rcu_prepare_for_idle(void)
   */
  static void rcu_cleanup_after_idle(void)
  {
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
-       if (rcu_is_nocb_cpu(smp_processor_id()))
+       if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) ||
+           rcu_is_nocb_cpu(smp_processor_id()))
                 return;
         if (rcu_try_advance_all_cbs())
                 invoke_rcu_core();
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
  }
  
  /*
@@ -1564,12 +1557,10 @@ static int rcu_oom_notify(struct notifier_block *self,
          */
         atomic_set(&oom_callback_count, 1);
  
-       get_online_cpus();
         for_each_online_cpu(cpu) {
                 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
                 cond_resched_rcu_qs();
         }
-       put_online_cpus();
  
         /* Unconditionally decrement: no need to wake ourselves up. */
         atomic_dec(&oom_callback_count);
@@ -1590,8 +1581,6 @@ early_initcall(rcu_register_oom_notifier);
  
  #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
  
-#ifdef CONFIG_RCU_CPU_STALL_INFO
-
  #ifdef CONFIG_RCU_FAST_NO_HZ
  
  static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
@@ -1649,12 +1638,16 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
                 ticks_value = rsp->gpnum - rdp->gpnum;
         }
         print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
-       pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n",
-              cpu, ticks_value, ticks_title,
+       pr_err("\t%d-%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n",
+              cpu,
+              "O."[!!cpu_online(cpu)],
+              "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
+              "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)],
+              ticks_value, ticks_title,
                atomic_read(&rdtp->dynticks) & 0xfff,
                rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
                rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
-              ACCESS_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart,
+              READ_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart,
                fast_no_hz);
  }
  
@@ -1680,33 +1673,6 @@ static void increment_cpu_stall_ticks(void)
                 raw_cpu_inc(rsp->rda->ticks_this_gp);
  }
  
-#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
-
-static void print_cpu_stall_info_begin(void)
-{
-       pr_cont(" {");
-}
-
-static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
-{
-       pr_cont(" %d", cpu);
-}
-
-static void print_cpu_stall_info_end(void)
-{
-       pr_cont("} ");
-}
-
-static void zero_cpu_stall_ticks(struct rcu_data *rdp)
-{
-}
-
-static void increment_cpu_stall_ticks(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
-
  #ifdef CONFIG_RCU_NOCB_CPU
  
  /*
@@ -1751,9 +1717,9 @@ early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
   * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
   * grace period.
   */
-static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
  {
-       swait_wake_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
+       swake_up_all(sq);
  }
  
  /*
@@ -1769,10 +1735,15 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
         rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
  }
  
+static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
+{
+       return &rnp->nocb_gp_wq[rnp->completed & 0x1];
+}
+
  static void rcu_init_one_nocb(struct rcu_node *rnp)
  {
-       init_swait_head(&rnp->nocb_gp_wq[0]);
-       init_swait_head(&rnp->nocb_gp_wq[1]);
+       init_swait_queue_head(&rnp->nocb_gp_wq[0]);
+       init_swait_queue_head(&rnp->nocb_gp_wq[1]);
  }
  
  #ifndef CONFIG_RCU_NOCB_CPU_ALL
@@ -1792,12 +1763,12 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
  {
         struct rcu_data *rdp_leader = rdp->nocb_leader;
  
-       if (!ACCESS_ONCE(rdp_leader->nocb_kthread))
+       if (!READ_ONCE(rdp_leader->nocb_kthread))
                 return;
-       if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) {
+       if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
                 /* Prior smp_mb__after_atomic() orders against prior enqueue. */
-               ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false;
-               swait_wake(&rdp_leader->nocb_wq);
+               WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
+               swake_up(&rdp_leader->nocb_wq);
         }
  }
  
@@ -1828,14 +1799,14 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
         ret = atomic_long_read(&rdp->nocb_q_count);
  
  #ifdef CONFIG_PROVE_RCU
-       rhp = ACCESS_ONCE(rdp->nocb_head);
+       rhp = READ_ONCE(rdp->nocb_head);
         if (!rhp)
-               rhp = ACCESS_ONCE(rdp->nocb_gp_head);
+               rhp = READ_ONCE(rdp->nocb_gp_head);
         if (!rhp)
-               rhp = ACCESS_ONCE(rdp->nocb_follower_head);
+               rhp = READ_ONCE(rdp->nocb_follower_head);
  
         /* Having no rcuo kthread but CBs after scheduler starts is bad! */
-       if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp &&
+       if (!READ_ONCE(rdp->nocb_kthread) && rhp &&
             rcu_scheduler_fully_active) {
                 /* RCU callback enqueued before CPU first came online??? */
                 pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n",
@@ -1869,12 +1840,12 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
         atomic_long_add(rhcount, &rdp->nocb_q_count);
         /* rcu_barrier() relies on ->nocb_q_count add before xchg. */
         old_rhpp = xchg(&rdp->nocb_tail, rhtp);
-       ACCESS_ONCE(*old_rhpp) = rhp;
+       WRITE_ONCE(*old_rhpp, rhp);
         atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
         smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */
  
         /* If we are not being polled and there is a kthread, awaken it ... */
-       t = ACCESS_ONCE(rdp->nocb_kthread);
+       t = READ_ONCE(rdp->nocb_kthread);
         if (rcu_nocb_poll || !t) {
                 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
                                     TPS("WakeNotPoll"));
@@ -2012,7 +1983,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
         for (;;) {
                 swait_event_interruptible(
                         rnp->nocb_gp_wq[c & 0x1],
-                       (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
+                       (d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c)));
                 if (likely(d))
                         break;
                 WARN_ON(signal_pending(current));
@@ -2039,7 +2010,7 @@ wait_again:
         if (!rcu_nocb_poll) {
                 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
                 swait_event_interruptible(my_rdp->nocb_wq,
-                               !ACCESS_ONCE(my_rdp->nocb_leader_sleep));
+                               !READ_ONCE(my_rdp->nocb_leader_sleep));
                 /* Memory barrier handled by smp_mb() calls below and repoll. */
         } else if (firsttime) {
                 firsttime = false; /* Don't drown trace log with "Poll"! */
@@ -2053,12 +2024,12 @@ wait_again:
          */
         gotcbs = false;
         for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
-               rdp->nocb_gp_head = ACCESS_ONCE(rdp->nocb_head);
+               rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head);
                 if (!rdp->nocb_gp_head)
                         continue;  /* No CBs here, try next follower. */
  
                 /* Move callbacks to wait-for-GP list, which is empty. */
-               ACCESS_ONCE(rdp->nocb_head) = NULL;
+               WRITE_ONCE(rdp->nocb_head, NULL);
                 rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
                 gotcbs = true;
         }
@@ -2078,7 +2049,7 @@ wait_again:
                 my_rdp->nocb_leader_sleep = true;
                 smp_mb();  /* Ensure _sleep true before scan. */
                 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower)
-                       if (ACCESS_ONCE(rdp->nocb_head)) {
+                       if (READ_ONCE(rdp->nocb_head)) {
                                 /* Found CB, so short-circuit next wait. */
                                 my_rdp->nocb_leader_sleep = false;
                                 break;
@@ -2099,7 +2070,7 @@ wait_again:
  
         /* Each pass through the following loop wakes a follower, if needed. */
         for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
-               if (ACCESS_ONCE(rdp->nocb_head))
+               if (READ_ONCE(rdp->nocb_head))
                         my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/
                 if (!rdp->nocb_gp_head)
                         continue; /* No CBs, so no need to wake follower. */
@@ -2113,7 +2084,7 @@ wait_again:
                          * List was empty, wake up the follower.
                          * Memory barriers supplied by atomic_long_add().
                          */
-                       swait_wake(&rdp->nocb_wq);
+                       swake_up(&rdp->nocb_wq);
                 }
         }
  
@@ -2135,7 +2106,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
                         trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
                                             "FollowerSleep");
                         swait_event_interruptible(rdp->nocb_wq,
-                                                ACCESS_ONCE(rdp->nocb_follower_head));
+                                                READ_ONCE(rdp->nocb_follower_head));
                 } else if (firsttime) {
                         /* Don't drown trace log with "Poll"! */
                         firsttime = false;
@@ -2176,10 +2147,10 @@ static int rcu_nocb_kthread(void *arg)
                         nocb_follower_wait(rdp);
  
                 /* Pull the ready-to-invoke callbacks onto local list. */
-               list = ACCESS_ONCE(rdp->nocb_follower_head);
+               list = READ_ONCE(rdp->nocb_follower_head);
                 BUG_ON(!list);
                 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty");
-               ACCESS_ONCE(rdp->nocb_follower_head) = NULL;
+               WRITE_ONCE(rdp->nocb_follower_head, NULL);
                 tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head);
  
                 /* Each pass through the following loop invokes a callback. */
@@ -2218,7 +2189,7 @@ static int rcu_nocb_kthread(void *arg)
  /* Is a deferred wakeup of rcu_nocb_kthread() required? */
  static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
  {
-       return ACCESS_ONCE(rdp->nocb_defer_wakeup);
+       return READ_ONCE(rdp->nocb_defer_wakeup);
  }
  
  /* Do a deferred wakeup of rcu_nocb_kthread(). */
@@ -2228,8 +2199,8 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
  
         if (!rcu_nocb_need_deferred_wakeup(rdp))
                 return;
-       ndw = ACCESS_ONCE(rdp->nocb_defer_wakeup);
-       ACCESS_ONCE(rdp->nocb_defer_wakeup) = RCU_NOGP_WAKE_NOT;
+       ndw = READ_ONCE(rdp->nocb_defer_wakeup);
+       WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_NOT);
         wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE);
         trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));
  }
@@ -2293,7 +2264,7 @@ void __init rcu_init_nohz(void)
  static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
  {
         rdp->nocb_tail = &rdp->nocb_head;
-       init_swait_head(&rdp->nocb_wq);
+       init_swait_queue_head(&rdp->nocb_wq);
         rdp->nocb_follower_tail = &rdp->nocb_follower_head;
  }
  
@@ -2342,7 +2313,7 @@ static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu)
         t = kthread_run(rcu_nocb_kthread, rdp_spawn,
                         "rcuo%c/%d", rsp->abbr, cpu);
         BUG_ON(IS_ERR(t));
-       ACCESS_ONCE(rdp_spawn->nocb_kthread) = t;
+       WRITE_ONCE(rdp_spawn->nocb_kthread, t);
  }
  
  /*
@@ -2443,7 +2414,7 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
         return false;
  }
  
-static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
  {
  }
  
@@ -2451,6 +2422,11 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
  {
  }
  
+static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
+{
+       return NULL;
+}
+
  static void rcu_init_one_nocb(struct rcu_node *rnp)
  {
  }
@@ -2557,7 +2533,7 @@ static void rcu_sysidle_enter(int irq)
  
         /* Record start of fully idle period. */
         j = jiffies;
-       ACCESS_ONCE(rdtp->dynticks_idle_jiffies) = j;
+       WRITE_ONCE(rdtp->dynticks_idle_jiffies, j);
         smp_mb__before_atomic();
         atomic_inc(&rdtp->dynticks_idle);
         smp_mb__after_atomic();
@@ -2575,7 +2551,7 @@ static void rcu_sysidle_enter(int irq)
   */
  void rcu_sysidle_force_exit(void)
  {
-       int oldstate = ACCESS_ONCE(full_sysidle_state);
+       int oldstate = READ_ONCE(full_sysidle_state);
         int newoldstate;
  
         /*
@@ -2688,7 +2664,7 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
         smp_mb(); /* Read counters before timestamps. */
  
         /* Pick up timestamps. */
-       j = ACCESS_ONCE(rdtp->dynticks_idle_jiffies);
+       j = READ_ONCE(rdtp->dynticks_idle_jiffies);
         /* If this CPU entered idle more recently, update maxj timestamp. */
         if (ULONG_CMP_LT(*maxj, j))
                 *maxj = j;
@@ -2725,11 +2701,11 @@ static unsigned long rcu_sysidle_delay(void)
  static void rcu_sysidle(unsigned long j)
  {
         /* Check the current state. */
-       switch (ACCESS_ONCE(full_sysidle_state)) {
+       switch (READ_ONCE(full_sysidle_state)) {
         case RCU_SYSIDLE_NOT:
  
                 /* First time all are idle, so note a short idle period. */
-               ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_SHORT;
+               WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_SHORT);
                 break;
  
         case RCU_SYSIDLE_SHORT:
@@ -2767,7 +2743,7 @@ static void rcu_sysidle_cancel(void)
  {
         smp_mb();
         if (full_sysidle_state > RCU_SYSIDLE_SHORT)
-               ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT;
+               WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_NOT);
  }
  
  /*
@@ -2819,7 +2795,7 @@ static void rcu_sysidle_cb(struct rcu_head *rhp)
         smp_mb();  /* grace period precedes setting inuse. */
  
         rshp = container_of(rhp, struct rcu_sysidle_head, rh);
-       ACCESS_ONCE(rshp->inuse) = 0;
+       WRITE_ONCE(rshp->inuse, 0);
  }
  
  /*
@@ -2830,7 +2806,7 @@ static void rcu_sysidle_cb(struct rcu_head *rhp)
  bool rcu_sys_is_idle(void)
  {
         static struct rcu_sysidle_head rsh;
-       int rss = ACCESS_ONCE(full_sysidle_state);
+       int rss = READ_ONCE(full_sysidle_state);
  
         if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu))
                 return false;
@@ -2858,7 +2834,7 @@ bool rcu_sys_is_idle(void)
                         }
                         rcu_sysidle_report(rcu_state_p, isidle, maxj, false);
                         oldrss = rss;
-                       rss = ACCESS_ONCE(full_sysidle_state);
+                       rss = READ_ONCE(full_sysidle_state);
                 }
         }
  
@@ -2942,10 +2918,10 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
  #ifdef CONFIG_NO_HZ_FULL
         if (tick_nohz_full_cpu(smp_processor_id()) &&
             (!rcu_gp_in_progress(rsp) ||
-            ULONG_CMP_LT(jiffies, ACCESS_ONCE(rsp->gp_start) + HZ)))
-               return 1;
+            ULONG_CMP_LT(jiffies, READ_ONCE(rsp->gp_start) + HZ)))
+               return true;
  #endif /* #ifdef CONFIG_NO_HZ_FULL */
-       return 0;
+       return false;
  }
  
  /*
@@ -2971,7 +2947,7 @@ static void rcu_bind_gp_kthread(void)
  static void rcu_dynticks_task_enter(void)
  {
  #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
-       ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
+       WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
  #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
  }
  
@@ -2979,6 +2955,6 @@ static void rcu_dynticks_task_enter(void)
  static void rcu_dynticks_task_exit(void)
  {
  #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
-       ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1;
+       WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
  #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
  }