X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=kernel%2Fkernel%2Frcu%2Ftree_plugin.h;fp=kernel%2Fkernel%2Frcu%2Ftree_plugin.h;h=8e119cf647ba64148bfefc37443c8eb342676ceb;hb=e09b41010ba33a20a87472ee821fa407a5b8da36;hp=54da8f44d586616825443501a0e79aba9c70153e;hpb=f93b97fd65072de626c074dbe099a1fff05ce060;p=kvmfornfv.git diff --git a/kernel/kernel/rcu/tree_plugin.h b/kernel/kernel/rcu/tree_plugin.h index 54da8f44d..8e119cf64 100644 --- a/kernel/kernel/rcu/tree_plugin.h +++ b/kernel/kernel/rcu/tree_plugin.h @@ -28,7 +28,17 @@ #include "../locking/rtmutex_common.h" -#endif /* #ifdef CONFIG_RCU_BOOST */ +#else /* #ifdef CONFIG_RCU_BOOST */ + +/* + * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST, + * all uses are in dead code. Provide a definition to keep the compiler + * happy, but add WARN_ON_ONCE() to complain if used in the wrong place. + * This probably needs to be excluded from -rt builds. + */ +#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; }) + +#endif /* #else #ifdef CONFIG_RCU_BOOST */ /* * Control variables for per-CPU and per-rcu_node kthreads. These @@ -53,11 +63,11 @@ static void __init rcu_bootup_announce_oddness(void) { if (IS_ENABLED(CONFIG_RCU_TRACE)) pr_info("\tRCU debugfs-based tracing is enabled.\n"); - if ((IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || - (!IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)) + if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) || + (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32)) pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", - CONFIG_RCU_FANOUT); - if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) + RCU_FANOUT); + if (rcu_fanout_exact) pr_info("\tHierarchical RCU autobalancing is disabled.\n"); if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); @@ -65,14 +75,12 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tRCU lockdep checking is enabled.\n"); if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_RUNNABLE)) pr_info("\tRCU torture testing starts during boot.\n"); - if (IS_ENABLED(CONFIG_RCU_CPU_STALL_INFO)) - pr_info("\tAdditional per-CPU info printed with stalls.\n"); - if (NUM_RCU_LVL_4 != 0) - pr_info("\tFour-level hierarchy is enabled.\n"); - if (CONFIG_RCU_FANOUT_LEAF != 16) + if (RCU_NUM_LVLS >= 4) + pr_info("\tFour(or more)-level hierarchy is enabled.\n"); + if (RCU_FANOUT_LEAF != 16) pr_info("\tBuild-time adjustment of leaf fanout to %d.\n", - CONFIG_RCU_FANOUT_LEAF); - if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) + RCU_FANOUT_LEAF); + if (rcu_fanout_leaf != RCU_FANOUT_LEAF) pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); if (nr_cpu_ids != NR_CPUS) pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); @@ -83,9 +91,9 @@ static void __init rcu_bootup_announce_oddness(void) #ifdef CONFIG_PREEMPT_RCU RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); -static struct rcu_state *rcu_state_p = &rcu_preempt_state; +static struct rcu_state *const rcu_state_p = &rcu_preempt_state; +static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data; -static int rcu_preempted_readers_exp(struct rcu_node *rnp); static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, bool wake); @@ -98,6 +106,147 @@ static void __init rcu_bootup_announce(void) rcu_bootup_announce_oddness(); } +/* Flags for rcu_preempt_ctxt_queue() decision table. */ +#define RCU_GP_TASKS 0x8 +#define RCU_EXP_TASKS 0x4 +#define RCU_GP_BLKD 0x2 +#define RCU_EXP_BLKD 0x1 + +/* + * Queues a task preempted within an RCU-preempt read-side critical + * section into the appropriate location within the ->blkd_tasks list, + * depending on the states of any ongoing normal and expedited grace + * periods. The ->gp_tasks pointer indicates which element the normal + * grace period is waiting on (NULL if none), and the ->exp_tasks pointer + * indicates which element the expedited grace period is waiting on (again, + * NULL if none). If a grace period is waiting on a given element in the + * ->blkd_tasks list, it also waits on all subsequent elements. Thus, + * adding a task to the tail of the list blocks any grace period that is + * already waiting on one of the elements. In contrast, adding a task + * to the head of the list won't block any grace period that is already + * waiting on one of the elements. + * + * This queuing is imprecise, and can sometimes make an ongoing grace + * period wait for a task that is not strictly speaking blocking it. + * Given the choice, we needlessly block a normal grace period rather than + * blocking an expedited grace period. + * + * Note that an endless sequence of expedited grace periods still cannot + * indefinitely postpone a normal grace period. Eventually, all of the + * fixed number of preempted tasks blocking the normal grace period that are + * not also blocking the expedited grace period will resume and complete + * their RCU read-side critical sections. At that point, the ->gp_tasks + * pointer will equal the ->exp_tasks pointer, at which point the end of + * the corresponding expedited grace period will also be the end of the + * normal grace period. + */ +static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp, + unsigned long flags) __releases(rnp->lock) +{ + int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) + + (rnp->exp_tasks ? RCU_EXP_TASKS : 0) + + (rnp->qsmask & rdp->grpmask ? RCU_GP_BLKD : 0) + + (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0); + struct task_struct *t = current; + + /* + * Decide where to queue the newly blocked task. In theory, + * this could be an if-statement. In practice, when I tried + * that, it was quite messy. + */ + switch (blkd_state) { + case 0: + case RCU_EXP_TASKS: + case RCU_EXP_TASKS + RCU_GP_BLKD: + case RCU_GP_TASKS: + case RCU_GP_TASKS + RCU_EXP_TASKS: + + /* + * Blocking neither GP, or first task blocking the normal + * GP but not blocking the already-waiting expedited GP. + * Queue at the head of the list to avoid unnecessarily + * blocking the already-waiting GPs. + */ + list_add(&t->rcu_node_entry, &rnp->blkd_tasks); + break; + + case RCU_EXP_BLKD: + case RCU_GP_BLKD: + case RCU_GP_BLKD + RCU_EXP_BLKD: + case RCU_GP_TASKS + RCU_EXP_BLKD: + case RCU_GP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD: + case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD: + + /* + * First task arriving that blocks either GP, or first task + * arriving that blocks the expedited GP (with the normal + * GP already waiting), or a task arriving that blocks + * both GPs with both GPs already waiting. Queue at the + * tail of the list to avoid any GP waiting on any of the + * already queued tasks that are not blocking it. + */ + list_add_tail(&t->rcu_node_entry, &rnp->blkd_tasks); + break; + + case RCU_EXP_TASKS + RCU_EXP_BLKD: + case RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD: + case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_EXP_BLKD: + + /* + * Second or subsequent task blocking the expedited GP. + * The task either does not block the normal GP, or is the + * first task blocking the normal GP. Queue just after + * the first task blocking the expedited GP. + */ + list_add(&t->rcu_node_entry, rnp->exp_tasks); + break; + + case RCU_GP_TASKS + RCU_GP_BLKD: + case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD: + + /* + * Second or subsequent task blocking the normal GP. + * The task does not block the expedited GP. Queue just + * after the first task blocking the normal GP. + */ + list_add(&t->rcu_node_entry, rnp->gp_tasks); + break; + + default: + + /* Yet another exercise in excessive paranoia. */ + WARN_ON_ONCE(1); + break; + } + + /* + * We have now queued the task. If it was the first one to + * block either grace period, update the ->gp_tasks and/or + * ->exp_tasks pointers, respectively, to reference the newly + * blocked tasks. + */ + if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) + rnp->gp_tasks = &t->rcu_node_entry; + if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) + rnp->exp_tasks = &t->rcu_node_entry; + raw_spin_unlock(&rnp->lock); + + /* + * Report the quiescent state for the expedited GP. This expedited + * GP should not be able to end until we report, so there should be + * no need to check for a subsequent expedited GP. (Though we are + * still in a quiescent state in any case.) + */ + if (blkd_state & RCU_EXP_BLKD && + t->rcu_read_unlock_special.b.exp_need_qs) { + t->rcu_read_unlock_special.b.exp_need_qs = false; + rcu_report_exp_rdp(rdp->rsp, rdp, true); + } else { + WARN_ON_ONCE(t->rcu_read_unlock_special.b.exp_need_qs); + } + local_irq_restore(flags); +} + /* * Record a preemptible-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is @@ -109,11 +258,11 @@ static void __init rcu_bootup_announce(void) */ static void rcu_preempt_qs(void) { - if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) { + if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) { trace_rcu_grace_period(TPS("rcu_preempt"), - __this_cpu_read(rcu_preempt_data.gpnum), + __this_cpu_read(rcu_data_p->gpnum), TPS("cpuqs")); - __this_cpu_write(rcu_preempt_data.passed_quiesce, 1); + __this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false); barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */ current->rcu_read_unlock_special.b.need_qs = false; } @@ -143,7 +292,7 @@ static void rcu_preempt_note_context_switch(void) !t->rcu_read_unlock_special.b.blocked) { /* Possibly blocking in an RCU read-side critical section. */ - rdp = this_cpu_ptr(rcu_preempt_state.rda); + rdp = this_cpu_ptr(rcu_state_p->rda); rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); @@ -151,43 +300,18 @@ static void rcu_preempt_note_context_switch(void) t->rcu_blocked_node = rnp; /* - * If this CPU has already checked in, then this task - * will hold up the next grace period rather than the - * current grace period. Queue the task accordingly. - * If the task is queued for the current grace period - * (i.e., this CPU has not yet passed through a quiescent - * state for the current grace period), then as long - * as that task remains queued, the current grace period - * cannot end. Note that there is some uncertainty as - * to exactly when the current grace period started. - * We take a conservative approach, which can result - * in unnecessarily waiting on tasks that started very - * slightly after the current grace period began. C'est - * la vie!!! - * - * But first, note that the current CPU must still be - * on line! + * Verify the CPU's sanity, trace the preemption, and + * then queue the task as required based on the states + * of any ongoing and expedited grace periods. */ WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0); WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); - if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { - list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); - rnp->gp_tasks = &t->rcu_node_entry; -#ifdef CONFIG_RCU_BOOST - if (rnp->boost_tasks != NULL) - rnp->boost_tasks = rnp->gp_tasks; -#endif /* #ifdef CONFIG_RCU_BOOST */ - } else { - list_add(&t->rcu_node_entry, &rnp->blkd_tasks); - if (rnp->qsmask & rdp->grpmask) - rnp->gp_tasks = &t->rcu_node_entry; - } trace_rcu_preempt_task(rdp->rsp->name, t->pid, (rnp->qsmask & rdp->grpmask) ? rnp->gpnum : rnp->gpnum + 1); - raw_spin_unlock_irqrestore(&rnp->lock, flags); + rcu_preempt_ctxt_queue(rnp, rdp, flags); } else if (t->rcu_read_lock_nesting < 0 && t->rcu_read_unlock_special.s) { @@ -256,9 +380,8 @@ void rcu_read_unlock_special(struct task_struct *t) bool empty_exp_now; unsigned long flags; struct list_head *np; -#ifdef CONFIG_RCU_BOOST bool drop_boost_mutex = false; -#endif /* #ifdef CONFIG_RCU_BOOST */ + struct rcu_data *rdp; struct rcu_node *rnp; union rcu_special special; @@ -269,8 +392,8 @@ void rcu_read_unlock_special(struct task_struct *t) local_irq_save(flags); /* - * If RCU core is waiting for this CPU to exit critical section, - * let it know that we have done so. Because irqs are disabled, + * If RCU core is waiting for this CPU to exit its critical section, + * report the fact that it has exited. Because irqs are disabled, * t->rcu_read_unlock_special cannot change. */ special = t->rcu_read_unlock_special; @@ -283,13 +406,32 @@ void rcu_read_unlock_special(struct task_struct *t) } } + /* + * Respond to a request for an expedited grace period, but only if + * we were not preempted, meaning that we were running on the same + * CPU throughout. If we were preempted, the exp_need_qs flag + * would have been cleared at the time of the first preemption, + * and the quiescent state would be reported when we were dequeued. + */ + if (special.b.exp_need_qs) { + WARN_ON_ONCE(special.b.blocked); + t->rcu_read_unlock_special.b.exp_need_qs = false; + rdp = this_cpu_ptr(rcu_state_p->rda); + rcu_report_exp_rdp(rcu_state_p, rdp, true); + if (!t->rcu_read_unlock_special.s) { + local_irq_restore(flags); + return; + } + } + /* Hardware IRQ handlers cannot block, complain if they get here. */ if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) { lockdep_rcu_suspicious(__FILE__, __LINE__, "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n"); - pr_alert("->rcu_read_unlock_special: %#x (b: %d, nq: %d)\n", + pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n", t->rcu_read_unlock_special.s, t->rcu_read_unlock_special.b.blocked, + t->rcu_read_unlock_special.b.exp_need_qs, t->rcu_read_unlock_special.b.need_qs); local_irq_restore(flags); return; @@ -300,9 +442,11 @@ void rcu_read_unlock_special(struct task_struct *t) t->rcu_read_unlock_special.b.blocked = false; /* - * Remove this task from the list it blocked on. The - * task can migrate while we acquire the lock, but at - * most one time. So at most two passes through loop. + * Remove this task from the list it blocked on. The task + * now remains queued on the rcu_node corresponding to + * the CPU it first blocked on, so the first attempt to + * acquire the task's rcu_node's ->lock will succeed. + * Keep the loop and add a WARN_ON() out of sheer paranoia. */ for (;;) { rnp = t->rcu_blocked_node; @@ -310,10 +454,11 @@ void rcu_read_unlock_special(struct task_struct *t) smp_mb__after_unlock_lock(); if (rnp == t->rcu_blocked_node) break; + WARN_ON_ONCE(1); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); - empty_exp = !rcu_preempted_readers_exp(rnp); + empty_exp = sync_rcu_preempt_exp_done(rnp); smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ np = rcu_next_node_entry(t, rnp); list_del_init(&t->rcu_node_entry); @@ -324,12 +469,12 @@ void rcu_read_unlock_special(struct task_struct *t) rnp->gp_tasks = np; if (&t->rcu_node_entry == rnp->exp_tasks) rnp->exp_tasks = np; -#ifdef CONFIG_RCU_BOOST - if (&t->rcu_node_entry == rnp->boost_tasks) - rnp->boost_tasks = np; - /* Snapshot ->boost_mtx ownership with rcu_node lock held. */ - drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t; -#endif /* #ifdef CONFIG_RCU_BOOST */ + if (IS_ENABLED(CONFIG_RCU_BOOST)) { + if (&t->rcu_node_entry == rnp->boost_tasks) + rnp->boost_tasks = np; + /* Snapshot ->boost_mtx ownership w/rnp->lock held. */ + drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t; + } /* * If this was the last task on the current list, and if @@ -337,7 +482,7 @@ void rcu_read_unlock_special(struct task_struct *t) * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, * so we must take a snapshot of the expedited state. */ - empty_exp_now = !rcu_preempted_readers_exp(rnp); + empty_exp_now = sync_rcu_preempt_exp_done(rnp); if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) { trace_rcu_quiescent_state_report(TPS("preempt_rcu"), rnp->gpnum, @@ -346,24 +491,21 @@ void rcu_read_unlock_special(struct task_struct *t) rnp->grplo, rnp->grphi, !!rnp->gp_tasks); - rcu_report_unblock_qs_rnp(&rcu_preempt_state, - rnp, flags); + rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags); } else { raw_spin_unlock_irqrestore(&rnp->lock, flags); } -#ifdef CONFIG_RCU_BOOST /* Unboost if we were boosted. */ - if (drop_boost_mutex) + if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) rt_mutex_unlock(&rnp->boost_mtx); -#endif /* #ifdef CONFIG_RCU_BOOST */ /* * If this was the last task on the expedited lists, * then we need to report up the rcu_node hierarchy. */ if (!empty_exp && empty_exp_now) - rcu_report_exp_rnp(&rcu_preempt_state, rnp, true); + rcu_report_exp_rnp(rcu_state_p, rnp, true); } else { local_irq_restore(flags); } @@ -383,7 +525,7 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } - t = list_entry(rnp->gp_tasks, + t = list_entry(rnp->gp_tasks->prev, struct task_struct, rcu_node_entry); list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) sched_show_task(t); @@ -403,8 +545,6 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) rcu_print_detail_task_stall_rnp(rnp); } -#ifdef CONFIG_RCU_CPU_STALL_INFO - static void rcu_print_task_stall_begin(struct rcu_node *rnp) { pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", @@ -416,18 +556,6 @@ static void rcu_print_task_stall_end(void) pr_cont("\n"); } -#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ - -static void rcu_print_task_stall_begin(struct rcu_node *rnp) -{ -} - -static void rcu_print_task_stall_end(void) -{ -} - -#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */ - /* * Scan the current list of tasks blocked within RCU read-side critical * sections, printing out the tid of each. @@ -440,7 +568,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp) if (!rcu_preempt_blocked_readers_cgp(rnp)) return 0; rcu_print_task_stall_begin(rnp); - t = list_entry(rnp->gp_tasks, + t = list_entry(rnp->gp_tasks->prev, struct task_struct, rcu_node_entry); list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { pr_cont(" P%d", t->pid); @@ -450,6 +578,27 @@ static int rcu_print_task_stall(struct rcu_node *rnp) return ndetected; } +/* + * Scan the current list of tasks blocked within RCU read-side critical + * sections, printing out the tid of each that is blocking the current + * expedited grace period. + */ +static int rcu_print_task_exp_stall(struct rcu_node *rnp) +{ + struct task_struct *t; + int ndetected = 0; + + if (!rnp->exp_tasks) + return 0; + t = list_entry(rnp->exp_tasks->prev, + struct task_struct, rcu_node_entry); + list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { + pr_cont(" P%d", t->pid); + ndetected++; + } + return ndetected; +} + /* * Check that the list of blocked tasks for the newly completed grace * period is in fact empty. It is a serious bug to complete a grace @@ -484,17 +633,17 @@ static void rcu_preempt_check_callbacks(void) return; } if (t->rcu_read_lock_nesting > 0 && - __this_cpu_read(rcu_preempt_data.qs_pending) && - !__this_cpu_read(rcu_preempt_data.passed_quiesce)) + __this_cpu_read(rcu_data_p->core_needs_qs) && + __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm)) t->rcu_read_unlock_special.b.need_qs = true; } /* * Queue a preemptible-RCU callback for invocation after a grace period. */ -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +void call_rcu(struct rcu_head *head, rcu_callback_t func) { - __call_rcu(head, func, &rcu_preempt_state, -1, 0); + __call_rcu(head, func, rcu_state_p, -1, 0); } EXPORT_SYMBOL_GPL(call_rcu); @@ -514,10 +663,10 @@ EXPORT_SYMBOL_GPL(call_rcu); */ void synchronize_rcu(void) { - rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && - !lock_is_held(&rcu_lock_map) && - !lock_is_held(&rcu_sched_lock_map), - "Illegal synchronize_rcu() in RCU read-side critical section"); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || + lock_is_held(&rcu_lock_map) || + lock_is_held(&rcu_sched_lock_map), + "Illegal synchronize_rcu() in RCU read-side critical section"); if (!rcu_scheduler_active) return; if (rcu_gp_is_expedited()) @@ -527,157 +676,41 @@ void synchronize_rcu(void) } EXPORT_SYMBOL_GPL(synchronize_rcu); -static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); -static unsigned long sync_rcu_preempt_exp_count; -static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); - -/* - * Return non-zero if there are any tasks in RCU read-side critical - * sections blocking the current preemptible-RCU expedited grace period. - * If there is no preemptible-RCU expedited grace period currently in - * progress, returns zero unconditionally. - */ -static int rcu_preempted_readers_exp(struct rcu_node *rnp) -{ - return rnp->exp_tasks != NULL; -} - -/* - * return non-zero if there is no RCU expedited grace period in progress - * for the specified rcu_node structure, in other words, if all CPUs and - * tasks covered by the specified rcu_node structure have done their bit - * for the current expedited grace period. Works only for preemptible - * RCU -- other RCU implementation use other means. - * - * Caller must hold sync_rcu_preempt_exp_mutex. - */ -static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) -{ - return !rcu_preempted_readers_exp(rnp) && - ACCESS_ONCE(rnp->expmask) == 0; -} - -/* - * Report the exit from RCU read-side critical section for the last task - * that queued itself during or before the current expedited preemptible-RCU - * grace period. This event is reported either to the rcu_node structure on - * which the task was queued or to one of that rcu_node structure's ancestors, - * recursively up the tree. (Calm down, calm down, we do the recursion - * iteratively!) - * - * Caller must hold sync_rcu_preempt_exp_mutex. - */ -static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, - bool wake) -{ - unsigned long flags; - unsigned long mask; - - raw_spin_lock_irqsave(&rnp->lock, flags); - smp_mb__after_unlock_lock(); - for (;;) { - if (!sync_rcu_preempt_exp_done(rnp)) { - raw_spin_unlock_irqrestore(&rnp->lock, flags); - break; - } - if (rnp->parent == NULL) { - raw_spin_unlock_irqrestore(&rnp->lock, flags); - if (wake) { - smp_mb(); /* EGP done before wake_up(). */ - wake_up(&sync_rcu_preempt_exp_wq); - } - break; - } - mask = rnp->grpmask; - raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ - rnp = rnp->parent; - raw_spin_lock(&rnp->lock); /* irqs already disabled */ - smp_mb__after_unlock_lock(); - rnp->expmask &= ~mask; - } -} - -/* - * Snapshot the tasks blocking the newly started preemptible-RCU expedited - * grace period for the specified rcu_node structure, phase 1. If there - * are such tasks, set the ->expmask bits up the rcu_node tree and also - * set the ->expmask bits on the leaf rcu_node structures to tell phase 2 - * that work is needed here. - * - * Caller must hold sync_rcu_preempt_exp_mutex. - */ -static void -sync_rcu_preempt_exp_init1(struct rcu_state *rsp, struct rcu_node *rnp) -{ - unsigned long flags; - unsigned long mask; - struct rcu_node *rnp_up; - - raw_spin_lock_irqsave(&rnp->lock, flags); - smp_mb__after_unlock_lock(); - WARN_ON_ONCE(rnp->expmask); - WARN_ON_ONCE(rnp->exp_tasks); - if (!rcu_preempt_has_tasks(rnp)) { - /* No blocked tasks, nothing to do. */ - raw_spin_unlock_irqrestore(&rnp->lock, flags); - return; - } - /* Call for Phase 2 and propagate ->expmask bits up the tree. */ - rnp->expmask = 1; - rnp_up = rnp; - while (rnp_up->parent) { - mask = rnp_up->grpmask; - rnp_up = rnp_up->parent; - if (rnp_up->expmask & mask) - break; - raw_spin_lock(&rnp_up->lock); /* irqs already off */ - smp_mb__after_unlock_lock(); - rnp_up->expmask |= mask; - raw_spin_unlock(&rnp_up->lock); /* irqs still off */ - } - raw_spin_unlock_irqrestore(&rnp->lock, flags); -} - /* - * Snapshot the tasks blocking the newly started preemptible-RCU expedited - * grace period for the specified rcu_node structure, phase 2. If the - * leaf rcu_node structure has its ->expmask field set, check for tasks. - * If there are some, clear ->expmask and set ->exp_tasks accordingly, - * then initiate RCU priority boosting. Otherwise, clear ->expmask and - * invoke rcu_report_exp_rnp() to clear out the upper-level ->expmask bits, - * enabling rcu_read_unlock_special() to do the bit-clearing. - * - * Caller must hold sync_rcu_preempt_exp_mutex. + * Remote handler for smp_call_function_single(). If there is an + * RCU read-side critical section in effect, request that the + * next rcu_read_unlock() record the quiescent state up the + * ->expmask fields in the rcu_node tree. Otherwise, immediately + * report the quiescent state. */ -static void -sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp) +static void sync_rcu_exp_handler(void *info) { - unsigned long flags; - - raw_spin_lock_irqsave(&rnp->lock, flags); - smp_mb__after_unlock_lock(); - if (!rnp->expmask) { - /* Phase 1 didn't do anything, so Phase 2 doesn't either. */ - raw_spin_unlock_irqrestore(&rnp->lock, flags); - return; - } - - /* Phase 1 is over. */ - rnp->expmask = 0; + struct rcu_data *rdp; + struct rcu_state *rsp = info; + struct task_struct *t = current; /* - * If there are still blocked tasks, set up ->exp_tasks so that - * rcu_read_unlock_special() will wake us and then boost them. + * Within an RCU read-side critical section, request that the next + * rcu_read_unlock() report. Unless this RCU read-side critical + * section has already blocked, in which case it is already set + * up for the expedited grace period to wait on it. */ - if (rcu_preempt_has_tasks(rnp)) { - rnp->exp_tasks = rnp->blkd_tasks.next; - rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ + if (t->rcu_read_lock_nesting > 0 && + !t->rcu_read_unlock_special.b.blocked) { + t->rcu_read_unlock_special.b.exp_need_qs = true; return; } - /* No longer any blocked tasks, so undo bit setting. */ - raw_spin_unlock_irqrestore(&rnp->lock, flags); - rcu_report_exp_rnp(rsp, rnp, false); + /* + * We are either exiting an RCU read-side critical section (negative + * values of t->rcu_read_lock_nesting) or are not in one at all + * (zero value of t->rcu_read_lock_nesting). Or we are in an RCU + * read-side critical section that blocked before this expedited + * grace period started. Either way, we can immediately report + * the quiescent state. + */ + rdp = this_cpu_ptr(rsp->rda); + rcu_report_exp_rdp(rsp, rdp, true); } /** @@ -695,81 +728,28 @@ sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp) void synchronize_rcu_expedited(void) { struct rcu_node *rnp; - struct rcu_state *rsp = &rcu_preempt_state; - unsigned long snap; - int trycount = 0; + struct rcu_node *rnp_unlock; + struct rcu_state *rsp = rcu_state_p; + unsigned long s; - smp_mb(); /* Caller's modifications seen first by other CPUs. */ - snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; - smp_mb(); /* Above access cannot bleed into critical section. */ + s = rcu_exp_gp_seq_snap(rsp); - /* - * Block CPU-hotplug operations. This means that any CPU-hotplug - * operation that finds an rcu_node structure with tasks in the - * process of being boosted will know that all tasks blocking - * this expedited grace period will already be in the process of - * being boosted. This simplifies the process of moving tasks - * from leaf to root rcu_node structures. - */ - if (!try_get_online_cpus()) { - /* CPU-hotplug operation in flight, fall back to normal GP. */ - wait_rcu_gp(call_rcu); - return; - } + rnp_unlock = exp_funnel_lock(rsp, s); + if (rnp_unlock == NULL) + return; /* Someone else did our work for us. */ - /* - * Acquire lock, falling back to synchronize_rcu() if too many - * lock-acquisition failures. Of course, if someone does the - * expedited grace period for us, just leave. - */ - while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { - if (ULONG_CMP_LT(snap, - ACCESS_ONCE(sync_rcu_preempt_exp_count))) { - put_online_cpus(); - goto mb_ret; /* Others did our work for us. */ - } - if (trycount++ < 10) { - udelay(trycount * num_online_cpus()); - } else { - put_online_cpus(); - wait_rcu_gp(call_rcu); - return; - } - } - if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) { - put_online_cpus(); - goto unlock_mb_ret; /* Others did our work for us. */ - } + rcu_exp_gp_seq_start(rsp); - /* force all RCU readers onto ->blkd_tasks lists. */ - synchronize_sched_expedited(); - - /* - * Snapshot current state of ->blkd_tasks lists into ->expmask. - * Phase 1 sets bits and phase 2 permits rcu_read_unlock_special() - * to start clearing them. Doing this in one phase leads to - * strange races between setting and clearing bits, so just say "no"! - */ - rcu_for_each_leaf_node(rsp, rnp) - sync_rcu_preempt_exp_init1(rsp, rnp); - rcu_for_each_leaf_node(rsp, rnp) - sync_rcu_preempt_exp_init2(rsp, rnp); - - put_online_cpus(); + /* Initialize the rcu_node tree in preparation for the wait. */ + sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler); /* Wait for snapshotted ->blkd_tasks lists to drain. */ rnp = rcu_get_root(rsp); - wait_event(sync_rcu_preempt_exp_wq, - sync_rcu_preempt_exp_done(rnp)); + synchronize_sched_expedited_wait(rsp); /* Clean up and exit. */ - smp_mb(); /* ensure expedited GP seen before counter increment. */ - ACCESS_ONCE(sync_rcu_preempt_exp_count) = - sync_rcu_preempt_exp_count + 1; -unlock_mb_ret: - mutex_unlock(&sync_rcu_preempt_exp_mutex); -mb_ret: - smp_mb(); /* ensure subsequent action seen after grace period. */ + rcu_exp_gp_seq_end(rsp); + mutex_unlock(&rnp_unlock->exp_funnel_mutex); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); @@ -783,7 +763,7 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); */ void rcu_barrier(void) { - _rcu_barrier(&rcu_preempt_state); + _rcu_barrier(rcu_state_p); } EXPORT_SYMBOL_GPL(rcu_barrier); @@ -792,7 +772,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier); */ static void __init __rcu_init_preempt(void) { - rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); + rcu_init_one(rcu_state_p, rcu_data_p); } /* @@ -815,7 +795,8 @@ void exit_rcu(void) #else /* #ifdef CONFIG_PREEMPT_RCU */ -static struct rcu_state *rcu_state_p = &rcu_sched_state; +static struct rcu_state *const rcu_state_p = &rcu_sched_state; +static struct rcu_data __percpu *const rcu_data_p = &rcu_sched_data; /* * Tell them what RCU they are running. @@ -868,6 +849,16 @@ static int rcu_print_task_stall(struct rcu_node *rnp) return 0; } +/* + * Because preemptible RCU does not exist, we never have to check for + * tasks blocked within RCU read-side critical sections that are + * blocking the current expedited grace period. + */ +static int rcu_print_task_exp_stall(struct rcu_node *rnp) +{ + return 0; +} + /* * Because there is no preemptible RCU, there can be no readers blocked, * so there is no need to check for blocked tasks. So check only for @@ -981,8 +972,8 @@ static int rcu_boost(struct rcu_node *rnp) struct task_struct *t; struct list_head *tb; - if (ACCESS_ONCE(rnp->exp_tasks) == NULL && - ACCESS_ONCE(rnp->boost_tasks) == NULL) + if (READ_ONCE(rnp->exp_tasks) == NULL && + READ_ONCE(rnp->boost_tasks) == NULL) return 0; /* Nothing left to boost. */ raw_spin_lock_irqsave(&rnp->lock, flags); @@ -1035,13 +1026,12 @@ static int rcu_boost(struct rcu_node *rnp) rt_mutex_lock(&rnp->boost_mtx); rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */ - return ACCESS_ONCE(rnp->exp_tasks) != NULL || - ACCESS_ONCE(rnp->boost_tasks) != NULL; + return READ_ONCE(rnp->exp_tasks) != NULL || + READ_ONCE(rnp->boost_tasks) != NULL; } /* - * Priority-boosting kthread. One per leaf rcu_node and one for the - * root rcu_node. + * Priority-boosting kthread, one per leaf rcu_node. */ static int rcu_boost_kthread(void *arg) { @@ -1143,7 +1133,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct sched_param sp; struct task_struct *t; - if (&rcu_preempt_state != rsp) + if (rcu_state_p != rsp) return 0; if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0) @@ -1257,13 +1247,12 @@ static void rcu_prepare_kthreads(int cpu) * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs * any flavor of RCU. */ -#ifndef CONFIG_RCU_NOCB_CPU_ALL -int rcu_needs_cpu(unsigned long *delta_jiffies) +int rcu_needs_cpu(u64 basemono, u64 *nextevt) { - *delta_jiffies = ULONG_MAX; - return rcu_cpu_has_callbacks(NULL); + *nextevt = KTIME_MAX; + return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) + ? 0 : rcu_cpu_has_callbacks(NULL); } -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ #endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */ #if !defined(CONFIG_RCU_FAST_NO_HZ) @@ -1324,8 +1313,6 @@ module_param(rcu_idle_gp_delay, int, 0644); static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; module_param(rcu_idle_lazy_gp_delay, int, 0644); -extern int tick_nohz_active; - /* * Try to advance callbacks for all flavors of RCU on the current CPU, but * only if it has been awhile since the last time we did so. Afterwards, @@ -1354,7 +1341,7 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) * callbacks not yet ready to invoke. */ if ((rdp->completed != rnp->completed || - unlikely(ACCESS_ONCE(rdp->gpwrap))) && + unlikely(READ_ONCE(rdp->gpwrap))) && rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) note_gp_changes(rsp, rdp); @@ -1374,17 +1361,22 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) * * The caller must have disabled interrupts. */ -#ifndef CONFIG_RCU_NOCB_CPU_ALL -int rcu_needs_cpu(unsigned long *dj) +int rcu_needs_cpu(u64 basemono, u64 *nextevt) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + unsigned long dj; + + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) { + *nextevt = KTIME_MAX; + return 0; + } /* Snapshot to detect later posting of non-lazy callback. */ rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; /* If no callbacks, RCU doesn't need the CPU. */ if (!rcu_cpu_has_callbacks(&rdtp->all_lazy)) { - *dj = ULONG_MAX; + *nextevt = KTIME_MAX; return 0; } @@ -1398,15 +1390,16 @@ int rcu_needs_cpu(unsigned long *dj) /* Request timer delay depending on laziness, and round. */ if (!rdtp->all_lazy) { - *dj = round_up(rcu_idle_gp_delay + jiffies, + dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies; } else { - *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; + dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; } + *nextevt = basemono + dj * TICK_NSEC; return 0; } -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ #endif /* #ifndef CONFIG_PREEMPT_RT_FULL */ + /* * Prepare a CPU for idle from an RCU perspective. The first major task * is to sense whether nohz mode has been enabled or disabled via sysfs. @@ -1419,7 +1412,6 @@ int rcu_needs_cpu(unsigned long *dj) */ static void rcu_prepare_for_idle(void) { -#ifndef CONFIG_RCU_NOCB_CPU_ALL bool needwake; struct rcu_data *rdp; struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); @@ -1427,8 +1419,11 @@ static void rcu_prepare_for_idle(void) struct rcu_state *rsp; int tne; + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) + return; + /* Handle nohz enablement switches conservatively. */ - tne = ACCESS_ONCE(tick_nohz_active); + tne = READ_ONCE(tick_nohz_active); if (tne != rdtp->tick_nohz_enabled_snap) { if (rcu_cpu_has_callbacks(NULL)) invoke_rcu_core(); /* force nohz to see update. */ @@ -1474,7 +1469,6 @@ static void rcu_prepare_for_idle(void) if (needwake) rcu_gp_kthread_wake(rsp); } -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ } /* @@ -1484,12 +1478,11 @@ static void rcu_prepare_for_idle(void) */ static void rcu_cleanup_after_idle(void) { -#ifndef CONFIG_RCU_NOCB_CPU_ALL - if (rcu_is_nocb_cpu(smp_processor_id())) + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) || + rcu_is_nocb_cpu(smp_processor_id())) return; if (rcu_try_advance_all_cbs()) invoke_rcu_core(); -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ } /* @@ -1564,12 +1557,10 @@ static int rcu_oom_notify(struct notifier_block *self, */ atomic_set(&oom_callback_count, 1); - get_online_cpus(); for_each_online_cpu(cpu) { smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); cond_resched_rcu_qs(); } - put_online_cpus(); /* Unconditionally decrement: no need to wake ourselves up. */ atomic_dec(&oom_callback_count); @@ -1590,8 +1581,6 @@ early_initcall(rcu_register_oom_notifier); #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ -#ifdef CONFIG_RCU_CPU_STALL_INFO - #ifdef CONFIG_RCU_FAST_NO_HZ static void print_cpu_stall_fast_no_hz(char *cp, int cpu) @@ -1649,12 +1638,16 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) ticks_value = rsp->gpnum - rdp->gpnum; } print_cpu_stall_fast_no_hz(fast_no_hz, cpu); - pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n", - cpu, ticks_value, ticks_title, + pr_err("\t%d-%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n", + cpu, + "O."[!!cpu_online(cpu)], + "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], + "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)], + ticks_value, ticks_title, atomic_read(&rdtp->dynticks) & 0xfff, rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), - ACCESS_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart, + READ_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart, fast_no_hz); } @@ -1680,33 +1673,6 @@ static void increment_cpu_stall_ticks(void) raw_cpu_inc(rsp->rda->ticks_this_gp); } -#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ - -static void print_cpu_stall_info_begin(void) -{ - pr_cont(" {"); -} - -static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) -{ - pr_cont(" %d", cpu); -} - -static void print_cpu_stall_info_end(void) -{ - pr_cont("} "); -} - -static void zero_cpu_stall_ticks(struct rcu_data *rdp) -{ -} - -static void increment_cpu_stall_ticks(void) -{ -} - -#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */ - #ifdef CONFIG_RCU_NOCB_CPU /* @@ -1751,9 +1717,9 @@ early_param("rcu_nocb_poll", parse_rcu_nocb_poll); * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended * grace period. */ -static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) +static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq) { - swait_wake_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]); + swake_up_all(sq); } /* @@ -1769,10 +1735,15 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq; } +static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) +{ + return &rnp->nocb_gp_wq[rnp->completed & 0x1]; +} + static void rcu_init_one_nocb(struct rcu_node *rnp) { - init_swait_head(&rnp->nocb_gp_wq[0]); - init_swait_head(&rnp->nocb_gp_wq[1]); + init_swait_queue_head(&rnp->nocb_gp_wq[0]); + init_swait_queue_head(&rnp->nocb_gp_wq[1]); } #ifndef CONFIG_RCU_NOCB_CPU_ALL @@ -1792,12 +1763,12 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force) { struct rcu_data *rdp_leader = rdp->nocb_leader; - if (!ACCESS_ONCE(rdp_leader->nocb_kthread)) + if (!READ_ONCE(rdp_leader->nocb_kthread)) return; - if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) { + if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) { /* Prior smp_mb__after_atomic() orders against prior enqueue. */ - ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false; - swait_wake(&rdp_leader->nocb_wq); + WRITE_ONCE(rdp_leader->nocb_leader_sleep, false); + swake_up(&rdp_leader->nocb_wq); } } @@ -1828,14 +1799,14 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) ret = atomic_long_read(&rdp->nocb_q_count); #ifdef CONFIG_PROVE_RCU - rhp = ACCESS_ONCE(rdp->nocb_head); + rhp = READ_ONCE(rdp->nocb_head); if (!rhp) - rhp = ACCESS_ONCE(rdp->nocb_gp_head); + rhp = READ_ONCE(rdp->nocb_gp_head); if (!rhp) - rhp = ACCESS_ONCE(rdp->nocb_follower_head); + rhp = READ_ONCE(rdp->nocb_follower_head); /* Having no rcuo kthread but CBs after scheduler starts is bad! */ - if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp && + if (!READ_ONCE(rdp->nocb_kthread) && rhp && rcu_scheduler_fully_active) { /* RCU callback enqueued before CPU first came online??? */ pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n", @@ -1869,12 +1840,12 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, atomic_long_add(rhcount, &rdp->nocb_q_count); /* rcu_barrier() relies on ->nocb_q_count add before xchg. */ old_rhpp = xchg(&rdp->nocb_tail, rhtp); - ACCESS_ONCE(*old_rhpp) = rhp; + WRITE_ONCE(*old_rhpp, rhp); atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy); smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */ /* If we are not being polled and there is a kthread, awaken it ... */ - t = ACCESS_ONCE(rdp->nocb_kthread); + t = READ_ONCE(rdp->nocb_kthread); if (rcu_nocb_poll || !t) { trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNotPoll")); @@ -2012,7 +1983,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) for (;;) { swait_event_interruptible( rnp->nocb_gp_wq[c & 0x1], - (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c))); + (d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c))); if (likely(d)) break; WARN_ON(signal_pending(current)); @@ -2039,7 +2010,7 @@ wait_again: if (!rcu_nocb_poll) { trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep"); swait_event_interruptible(my_rdp->nocb_wq, - !ACCESS_ONCE(my_rdp->nocb_leader_sleep)); + !READ_ONCE(my_rdp->nocb_leader_sleep)); /* Memory barrier handled by smp_mb() calls below and repoll. */ } else if (firsttime) { firsttime = false; /* Don't drown trace log with "Poll"! */ @@ -2053,12 +2024,12 @@ wait_again: */ gotcbs = false; for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { - rdp->nocb_gp_head = ACCESS_ONCE(rdp->nocb_head); + rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head); if (!rdp->nocb_gp_head) continue; /* No CBs here, try next follower. */ /* Move callbacks to wait-for-GP list, which is empty. */ - ACCESS_ONCE(rdp->nocb_head) = NULL; + WRITE_ONCE(rdp->nocb_head, NULL); rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head); gotcbs = true; } @@ -2078,7 +2049,7 @@ wait_again: my_rdp->nocb_leader_sleep = true; smp_mb(); /* Ensure _sleep true before scan. */ for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) - if (ACCESS_ONCE(rdp->nocb_head)) { + if (READ_ONCE(rdp->nocb_head)) { /* Found CB, so short-circuit next wait. */ my_rdp->nocb_leader_sleep = false; break; @@ -2099,7 +2070,7 @@ wait_again: /* Each pass through the following loop wakes a follower, if needed. */ for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { - if (ACCESS_ONCE(rdp->nocb_head)) + if (READ_ONCE(rdp->nocb_head)) my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/ if (!rdp->nocb_gp_head) continue; /* No CBs, so no need to wake follower. */ @@ -2113,7 +2084,7 @@ wait_again: * List was empty, wake up the follower. * Memory barriers supplied by atomic_long_add(). */ - swait_wake(&rdp->nocb_wq); + swake_up(&rdp->nocb_wq); } } @@ -2135,7 +2106,7 @@ static void nocb_follower_wait(struct rcu_data *rdp) trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "FollowerSleep"); swait_event_interruptible(rdp->nocb_wq, - ACCESS_ONCE(rdp->nocb_follower_head)); + READ_ONCE(rdp->nocb_follower_head)); } else if (firsttime) { /* Don't drown trace log with "Poll"! */ firsttime = false; @@ -2176,10 +2147,10 @@ static int rcu_nocb_kthread(void *arg) nocb_follower_wait(rdp); /* Pull the ready-to-invoke callbacks onto local list. */ - list = ACCESS_ONCE(rdp->nocb_follower_head); + list = READ_ONCE(rdp->nocb_follower_head); BUG_ON(!list); trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty"); - ACCESS_ONCE(rdp->nocb_follower_head) = NULL; + WRITE_ONCE(rdp->nocb_follower_head, NULL); tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head); /* Each pass through the following loop invokes a callback. */ @@ -2218,7 +2189,7 @@ static int rcu_nocb_kthread(void *arg) /* Is a deferred wakeup of rcu_nocb_kthread() required? */ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) { - return ACCESS_ONCE(rdp->nocb_defer_wakeup); + return READ_ONCE(rdp->nocb_defer_wakeup); } /* Do a deferred wakeup of rcu_nocb_kthread(). */ @@ -2228,8 +2199,8 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp) if (!rcu_nocb_need_deferred_wakeup(rdp)) return; - ndw = ACCESS_ONCE(rdp->nocb_defer_wakeup); - ACCESS_ONCE(rdp->nocb_defer_wakeup) = RCU_NOGP_WAKE_NOT; + ndw = READ_ONCE(rdp->nocb_defer_wakeup); + WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_NOT); wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE); trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake")); } @@ -2293,7 +2264,7 @@ void __init rcu_init_nohz(void) static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) { rdp->nocb_tail = &rdp->nocb_head; - init_swait_head(&rdp->nocb_wq); + init_swait_queue_head(&rdp->nocb_wq); rdp->nocb_follower_tail = &rdp->nocb_follower_head; } @@ -2342,7 +2313,7 @@ static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu) t = kthread_run(rcu_nocb_kthread, rdp_spawn, "rcuo%c/%d", rsp->abbr, cpu); BUG_ON(IS_ERR(t)); - ACCESS_ONCE(rdp_spawn->nocb_kthread) = t; + WRITE_ONCE(rdp_spawn->nocb_kthread, t); } /* @@ -2443,7 +2414,7 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) return false; } -static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) +static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq) { } @@ -2451,6 +2422,11 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) { } +static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) +{ + return NULL; +} + static void rcu_init_one_nocb(struct rcu_node *rnp) { } @@ -2557,7 +2533,7 @@ static void rcu_sysidle_enter(int irq) /* Record start of fully idle period. */ j = jiffies; - ACCESS_ONCE(rdtp->dynticks_idle_jiffies) = j; + WRITE_ONCE(rdtp->dynticks_idle_jiffies, j); smp_mb__before_atomic(); atomic_inc(&rdtp->dynticks_idle); smp_mb__after_atomic(); @@ -2575,7 +2551,7 @@ static void rcu_sysidle_enter(int irq) */ void rcu_sysidle_force_exit(void) { - int oldstate = ACCESS_ONCE(full_sysidle_state); + int oldstate = READ_ONCE(full_sysidle_state); int newoldstate; /* @@ -2688,7 +2664,7 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, smp_mb(); /* Read counters before timestamps. */ /* Pick up timestamps. */ - j = ACCESS_ONCE(rdtp->dynticks_idle_jiffies); + j = READ_ONCE(rdtp->dynticks_idle_jiffies); /* If this CPU entered idle more recently, update maxj timestamp. */ if (ULONG_CMP_LT(*maxj, j)) *maxj = j; @@ -2725,11 +2701,11 @@ static unsigned long rcu_sysidle_delay(void) static void rcu_sysidle(unsigned long j) { /* Check the current state. */ - switch (ACCESS_ONCE(full_sysidle_state)) { + switch (READ_ONCE(full_sysidle_state)) { case RCU_SYSIDLE_NOT: /* First time all are idle, so note a short idle period. */ - ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_SHORT; + WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_SHORT); break; case RCU_SYSIDLE_SHORT: @@ -2767,7 +2743,7 @@ static void rcu_sysidle_cancel(void) { smp_mb(); if (full_sysidle_state > RCU_SYSIDLE_SHORT) - ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT; + WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_NOT); } /* @@ -2819,7 +2795,7 @@ static void rcu_sysidle_cb(struct rcu_head *rhp) smp_mb(); /* grace period precedes setting inuse. */ rshp = container_of(rhp, struct rcu_sysidle_head, rh); - ACCESS_ONCE(rshp->inuse) = 0; + WRITE_ONCE(rshp->inuse, 0); } /* @@ -2830,7 +2806,7 @@ static void rcu_sysidle_cb(struct rcu_head *rhp) bool rcu_sys_is_idle(void) { static struct rcu_sysidle_head rsh; - int rss = ACCESS_ONCE(full_sysidle_state); + int rss = READ_ONCE(full_sysidle_state); if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu)) return false; @@ -2858,7 +2834,7 @@ bool rcu_sys_is_idle(void) } rcu_sysidle_report(rcu_state_p, isidle, maxj, false); oldrss = rss; - rss = ACCESS_ONCE(full_sysidle_state); + rss = READ_ONCE(full_sysidle_state); } } @@ -2942,10 +2918,10 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp) #ifdef CONFIG_NO_HZ_FULL if (tick_nohz_full_cpu(smp_processor_id()) && (!rcu_gp_in_progress(rsp) || - ULONG_CMP_LT(jiffies, ACCESS_ONCE(rsp->gp_start) + HZ))) - return 1; + ULONG_CMP_LT(jiffies, READ_ONCE(rsp->gp_start) + HZ))) + return true; #endif /* #ifdef CONFIG_NO_HZ_FULL */ - return 0; + return false; } /* @@ -2971,7 +2947,7 @@ static void rcu_bind_gp_kthread(void) static void rcu_dynticks_task_enter(void) { #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) - ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id(); + WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id()); #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ } @@ -2979,6 +2955,6 @@ static void rcu_dynticks_task_enter(void) static void rcu_dynticks_task_exit(void) { #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) - ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1; + WRITE_ONCE(current->rcu_tasks_idle_cpu, -1); #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ }