diff options
Diffstat (limited to 'kernel/rcutree.c')
| -rw-r--r-- | kernel/rcutree.c | 255 | 
1 files changed, 153 insertions, 102 deletions
| diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 068de3a93606..32618b3fe4e6 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -53,18 +53,38 @@  #include <linux/delay.h>  #include <linux/stop_machine.h>  #include <linux/random.h> +#include <linux/ftrace_event.h> +#include <linux/suspend.h>  #include "rcutree.h"  #include <trace/events/rcu.h>  #include "rcu.h" +/* + * Strings used in tracepoints need to be exported via the + * tracing system such that tools like perf and trace-cmd can + * translate the string address pointers to actual text. + */ +#define TPS(x)	tracepoint_string(x) +  /* Data structures. */  static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];  static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; -#define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \ +/* + * In order to export the rcu_state name to the tracing tools, it + * needs to be added in the __tracepoint_string section. + * This requires defining a separate variable tp_<sname>_varname + * that points to the string being used, and this will allow + * the tracing userspace tools to be able to decipher the string + * address to the matching string. + */ +#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ +static char sname##_varname[] = #sname; \ +static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \ +struct rcu_state sname##_state = { \  	.level = { &sname##_state.node[0] }, \  	.call = cr, \  	.fqs_state = RCU_GP_IDLE, \ @@ -75,16 +95,13 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];  	.orphan_donetail = &sname##_state.orphan_donelist, \  	.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \  	.onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ -	.name = #sname, \ +	.name = sname##_varname, \  	.abbr = sabbr, \ -} - -struct rcu_state rcu_sched_state = -	RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); -DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); +}; \ +DEFINE_PER_CPU(struct rcu_data, sname##_data) -struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); -DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); +RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); +RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);  static struct rcu_state *rcu_state;  LIST_HEAD(rcu_struct_flavors); @@ -178,7 +195,7 @@ void rcu_sched_qs(int cpu)  	struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);  	if (rdp->passed_quiesce == 0) -		trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); +		trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));  	rdp->passed_quiesce = 1;  } @@ -187,7 +204,7 @@ void rcu_bh_qs(int cpu)  	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);  	if (rdp->passed_quiesce == 0) -		trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); +		trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));  	rdp->passed_quiesce = 1;  } @@ -198,16 +215,20 @@ void rcu_bh_qs(int cpu)   */  void rcu_note_context_switch(int cpu)  { -	trace_rcu_utilization("Start context switch"); +	trace_rcu_utilization(TPS("Start context switch"));  	rcu_sched_qs(cpu);  	rcu_preempt_note_context_switch(cpu); -	trace_rcu_utilization("End context switch"); +	trace_rcu_utilization(TPS("End context switch"));  }  EXPORT_SYMBOL_GPL(rcu_note_context_switch);  DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {  	.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,  	.dynticks = ATOMIC_INIT(1), +#ifdef CONFIG_NO_HZ_FULL_SYSIDLE +	.dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE, +	.dynticks_idle = ATOMIC_INIT(1), +#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */  };  static long blimit = 10;	/* Maximum callbacks per rcu_do_batch. */ @@ -226,7 +247,10 @@ module_param(jiffies_till_next_fqs, ulong, 0644);  static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,  				  struct rcu_data *rdp); -static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); +static void force_qs_rnp(struct rcu_state *rsp, +			 int (*f)(struct rcu_data *rsp, bool *isidle, +				  unsigned long *maxj), +			 bool *isidle, unsigned long *maxj);  static void force_quiescent_state(struct rcu_state *rsp);  static int rcu_pending(int cpu); @@ -345,11 +369,11 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)  static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,  				bool user)  { -	trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting); +	trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);  	if (!user && !is_idle_task(current)) {  		struct task_struct *idle = idle_task(smp_processor_id()); -		trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); +		trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);  		ftrace_dump(DUMP_ORIG);  		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",  			  current->pid, current->comm, @@ -411,6 +435,7 @@ void rcu_idle_enter(void)  	local_irq_save(flags);  	rcu_eqs_enter(false); +	rcu_sysidle_enter(&__get_cpu_var(rcu_dynticks), 0);  	local_irq_restore(flags);  }  EXPORT_SYMBOL_GPL(rcu_idle_enter); @@ -428,27 +453,6 @@ void rcu_user_enter(void)  {  	rcu_eqs_enter(1);  } - -/** - * rcu_user_enter_after_irq - inform RCU that we are going to resume userspace - * after the current irq returns. - * - * This is similar to rcu_user_enter() but in the context of a non-nesting - * irq. After this call, RCU enters into idle mode when the interrupt - * returns. - */ -void rcu_user_enter_after_irq(void) -{ -	unsigned long flags; -	struct rcu_dynticks *rdtp; - -	local_irq_save(flags); -	rdtp = &__get_cpu_var(rcu_dynticks); -	/* Ensure this irq is interrupting a non-idle RCU state.  */ -	WARN_ON_ONCE(!(rdtp->dynticks_nesting & DYNTICK_TASK_MASK)); -	rdtp->dynticks_nesting = 1; -	local_irq_restore(flags); -}  #endif /* CONFIG_RCU_USER_QS */  /** @@ -479,9 +483,10 @@ void rcu_irq_exit(void)  	rdtp->dynticks_nesting--;  	WARN_ON_ONCE(rdtp->dynticks_nesting < 0);  	if (rdtp->dynticks_nesting) -		trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); +		trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);  	else  		rcu_eqs_enter_common(rdtp, oldval, true); +	rcu_sysidle_enter(rdtp, 1);  	local_irq_restore(flags);  } @@ -501,11 +506,11 @@ static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,  	smp_mb__after_atomic_inc();  /* See above. */  	WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));  	rcu_cleanup_after_idle(smp_processor_id()); -	trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); +	trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);  	if (!user && !is_idle_task(current)) {  		struct task_struct *idle = idle_task(smp_processor_id()); -		trace_rcu_dyntick("Error on exit: not idle task", +		trace_rcu_dyntick(TPS("Error on exit: not idle task"),  				  oldval, rdtp->dynticks_nesting);  		ftrace_dump(DUMP_ORIG);  		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", @@ -550,6 +555,7 @@ void rcu_idle_exit(void)  	local_irq_save(flags);  	rcu_eqs_exit(false); +	rcu_sysidle_exit(&__get_cpu_var(rcu_dynticks), 0);  	local_irq_restore(flags);  }  EXPORT_SYMBOL_GPL(rcu_idle_exit); @@ -565,28 +571,6 @@ void rcu_user_exit(void)  {  	rcu_eqs_exit(1);  } - -/** - * rcu_user_exit_after_irq - inform RCU that we won't resume to userspace - * idle mode after the current non-nesting irq returns. - * - * This is similar to rcu_user_exit() but in the context of an irq. - * This is called when the irq has interrupted a userspace RCU idle mode - * context. When the current non-nesting interrupt returns after this call, - * the CPU won't restore the RCU idle mode. - */ -void rcu_user_exit_after_irq(void) -{ -	unsigned long flags; -	struct rcu_dynticks *rdtp; - -	local_irq_save(flags); -	rdtp = &__get_cpu_var(rcu_dynticks); -	/* Ensure we are interrupting an RCU idle mode. */ -	WARN_ON_ONCE(rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK); -	rdtp->dynticks_nesting += DYNTICK_TASK_EXIT_IDLE; -	local_irq_restore(flags); -}  #endif /* CONFIG_RCU_USER_QS */  /** @@ -620,9 +604,10 @@ void rcu_irq_enter(void)  	rdtp->dynticks_nesting++;  	WARN_ON_ONCE(rdtp->dynticks_nesting == 0);  	if (oldval) -		trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); +		trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);  	else  		rcu_eqs_exit_common(rdtp, oldval, true); +	rcu_sysidle_exit(rdtp, 1);  	local_irq_restore(flags);  } @@ -746,9 +731,11 @@ static int rcu_is_cpu_rrupt_from_idle(void)   * credit them with an implicit quiescent state.  Return 1 if this CPU   * is in dynticks idle mode, which is an extended quiescent state.   */ -static int dyntick_save_progress_counter(struct rcu_data *rdp) +static int dyntick_save_progress_counter(struct rcu_data *rdp, +					 bool *isidle, unsigned long *maxj)  {  	rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); +	rcu_sysidle_check_cpu(rdp, isidle, maxj);  	return (rdp->dynticks_snap & 0x1) == 0;  } @@ -758,7 +745,8 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)   * idle state since the last call to dyntick_save_progress_counter()   * for this same CPU, or by virtue of having been offline.   */ -static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) +static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, +				    bool *isidle, unsigned long *maxj)  {  	unsigned int curr;  	unsigned int snap; @@ -775,7 +763,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)  	 * of the current RCU grace period.  	 */  	if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) { -		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti"); +		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));  		rdp->dynticks_fqs++;  		return 1;  	} @@ -795,7 +783,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)  		return 0;  /* Grace period is not old enough. */  	barrier();  	if (cpu_is_offline(rdp->cpu)) { -		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); +		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));  		rdp->offline_fqs++;  		return 1;  	} @@ -1032,7 +1020,7 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp,   * rcu_nocb_wait_gp().   */  static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, -				unsigned long c, char *s) +				unsigned long c, const char *s)  {  	trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,  				      rnp->completed, c, rnp->level, @@ -1058,9 +1046,9 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)  	 * grace period is already marked as needed, return to the caller.  	 */  	c = rcu_cbs_completed(rdp->rsp, rnp); -	trace_rcu_future_gp(rnp, rdp, c, "Startleaf"); +	trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));  	if (rnp->need_future_gp[c & 0x1]) { -		trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf"); +		trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));  		return c;  	} @@ -1074,7 +1062,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)  	if (rnp->gpnum != rnp->completed ||  	    ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {  		rnp->need_future_gp[c & 0x1]++; -		trace_rcu_future_gp(rnp, rdp, c, "Startedleaf"); +		trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));  		return c;  	} @@ -1102,7 +1090,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)  	 * recorded, trace and leave.  	 */  	if (rnp_root->need_future_gp[c & 0x1]) { -		trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot"); +		trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));  		goto unlock_out;  	} @@ -1111,9 +1099,9 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)  	/* If a grace period is not already in progress, start one. */  	if (rnp_root->gpnum != rnp_root->completed) { -		trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot"); +		trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));  	} else { -		trace_rcu_future_gp(rnp, rdp, c, "Startedroot"); +		trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));  		rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);  	}  unlock_out: @@ -1137,7 +1125,8 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)  	rcu_nocb_gp_cleanup(rsp, rnp);  	rnp->need_future_gp[c & 0x1] = 0;  	needmore = rnp->need_future_gp[(c + 1) & 0x1]; -	trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup"); +	trace_rcu_future_gp(rnp, rdp, c, +			    needmore ? TPS("CleanupMore") : TPS("Cleanup"));  	return needmore;  } @@ -1205,9 +1194,9 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,  	/* Trace depending on how much we were able to accelerate. */  	if (!*rdp->nxttail[RCU_WAIT_TAIL]) -		trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccWaitCB"); +		trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));  	else -		trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccReadyCB"); +		trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));  }  /* @@ -1273,7 +1262,7 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc  		/* Remember that we saw this grace-period completion. */  		rdp->completed = rnp->completed; -		trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend"); +		trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));  	}  	if (rdp->gpnum != rnp->gpnum) { @@ -1283,7 +1272,7 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc  		 * go looking for one.  		 */  		rdp->gpnum = rnp->gpnum; -		trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); +		trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));  		rdp->passed_quiesce = 0;  		rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);  		zero_cpu_stall_ticks(rdp); @@ -1315,6 +1304,7 @@ static int rcu_gp_init(struct rcu_state *rsp)  	struct rcu_data *rdp;  	struct rcu_node *rnp = rcu_get_root(rsp); +	rcu_bind_gp_kthread();  	raw_spin_lock_irq(&rnp->lock);  	rsp->gp_flags = 0; /* Clear all flags: New grace period. */ @@ -1326,7 +1316,7 @@ static int rcu_gp_init(struct rcu_state *rsp)  	/* Advance to a new grace period and initialize state. */  	rsp->gpnum++; -	trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); +	trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));  	record_gp_stall_check_time(rsp);  	raw_spin_unlock_irq(&rnp->lock); @@ -1379,16 +1369,25 @@ static int rcu_gp_init(struct rcu_state *rsp)  int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)  {  	int fqs_state = fqs_state_in; +	bool isidle = false; +	unsigned long maxj;  	struct rcu_node *rnp = rcu_get_root(rsp);  	rsp->n_force_qs++;  	if (fqs_state == RCU_SAVE_DYNTICK) {  		/* Collect dyntick-idle snapshots. */ -		force_qs_rnp(rsp, dyntick_save_progress_counter); +		if (is_sysidle_rcu_state(rsp)) { +			isidle = 1; +			maxj = jiffies - ULONG_MAX / 4; +		} +		force_qs_rnp(rsp, dyntick_save_progress_counter, +			     &isidle, &maxj); +		rcu_sysidle_report_gp(rsp, isidle, maxj);  		fqs_state = RCU_FORCE_QS;  	} else {  		/* Handle dyntick-idle and offline CPUs. */ -		force_qs_rnp(rsp, rcu_implicit_dynticks_qs); +		isidle = 0; +		force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);  	}  	/* Clear flag to prevent immediate re-entry. */  	if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { @@ -1448,7 +1447,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)  	rcu_nocb_gp_set(rnp, nocb);  	rsp->completed = rsp->gpnum; /* Declare grace period done. */ -	trace_rcu_grace_period(rsp->name, rsp->completed, "end"); +	trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));  	rsp->fqs_state = RCU_GP_IDLE;  	rdp = this_cpu_ptr(rsp->rda);  	rcu_advance_cbs(rsp, rnp, rdp);  /* Reduce false positives below. */ @@ -1558,10 +1557,12 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,  	/*  	 * We can't do wakeups while holding the rnp->lock, as that -	 * could cause possible deadlocks with the rq->lock. Deter -	 * the wakeup to interrupt context. +	 * could cause possible deadlocks with the rq->lock. Defer +	 * the wakeup to interrupt context.  And don't bother waking +	 * up the running kthread.  	 */ -	irq_work_queue(&rsp->wakeup_work); +	if (current != rsp->gp_kthread) +		irq_work_queue(&rsp->wakeup_work);  }  /* @@ -1857,7 +1858,7 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)  	RCU_TRACE(mask = rdp->grpmask);  	trace_rcu_grace_period(rsp->name,  			       rnp->gpnum + 1 - !!(rnp->qsmask & mask), -			       "cpuofl"); +			       TPS("cpuofl"));  }  /* @@ -2044,7 +2045,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)   */  void rcu_check_callbacks(int cpu, int user)  { -	trace_rcu_utilization("Start scheduler-tick"); +	trace_rcu_utilization(TPS("Start scheduler-tick"));  	increment_cpu_stall_ticks();  	if (user || rcu_is_cpu_rrupt_from_idle()) { @@ -2077,7 +2078,7 @@ void rcu_check_callbacks(int cpu, int user)  	rcu_preempt_check_callbacks(cpu);  	if (rcu_pending(cpu))  		invoke_rcu_core(); -	trace_rcu_utilization("End scheduler-tick"); +	trace_rcu_utilization(TPS("End scheduler-tick"));  }  /* @@ -2087,7 +2088,10 @@ void rcu_check_callbacks(int cpu, int user)   *   * The caller must have suppressed start of new grace periods.   */ -static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) +static void force_qs_rnp(struct rcu_state *rsp, +			 int (*f)(struct rcu_data *rsp, bool *isidle, +				  unsigned long *maxj), +			 bool *isidle, unsigned long *maxj)  {  	unsigned long bit;  	int cpu; @@ -2110,9 +2114,12 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))  		cpu = rnp->grplo;  		bit = 1;  		for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { -			if ((rnp->qsmask & bit) != 0 && -			    f(per_cpu_ptr(rsp->rda, cpu))) -				mask |= bit; +			if ((rnp->qsmask & bit) != 0) { +				if ((rnp->qsmaskinit & bit) != 0) +					*isidle = 0; +				if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) +					mask |= bit; +			}  		}  		if (mask != 0) { @@ -2208,10 +2215,10 @@ static void rcu_process_callbacks(struct softirq_action *unused)  	if (cpu_is_offline(smp_processor_id()))  		return; -	trace_rcu_utilization("Start RCU core"); +	trace_rcu_utilization(TPS("Start RCU core"));  	for_each_rcu_flavor(rsp)  		__rcu_process_callbacks(rsp); -	trace_rcu_utilization("End RCU core"); +	trace_rcu_utilization(TPS("End RCU core"));  }  /* @@ -2287,6 +2294,13 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,  }  /* + * RCU callback function to leak a callback. + */ +static void rcu_leak_callback(struct rcu_head *rhp) +{ +} + +/*   * Helper function for call_rcu() and friends.  The cpu argument will   * normally be -1, indicating "currently running CPU".  It may specify   * a CPU only if that CPU is a no-CBs CPU.  Currently, only _rcu_barrier() @@ -2300,7 +2314,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),  	struct rcu_data *rdp;  	WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */ -	debug_rcu_head_queue(head); +	if (debug_rcu_head_queue(head)) { +		/* Probable double call_rcu(), so leak the callback. */ +		ACCESS_ONCE(head->func) = rcu_leak_callback; +		WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n"); +		return; +	}  	head->func = func;  	head->next = NULL; @@ -2720,7 +2739,7 @@ static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)   * Helper function for _rcu_barrier() tracing.  If tracing is disabled,   * the compiler is expected to optimize this away.   */ -static void _rcu_barrier_trace(struct rcu_state *rsp, char *s, +static void _rcu_barrier_trace(struct rcu_state *rsp, const char *s,  			       int cpu, unsigned long done)  {  	trace_rcu_barrier(rsp->name, s, cpu, @@ -2785,9 +2804,20 @@ static void _rcu_barrier(struct rcu_state *rsp)  	 * transition.  The "if" expression below therefore rounds the old  	 * value up to the next even number and adds two before comparing.  	 */ -	snap_done = ACCESS_ONCE(rsp->n_barrier_done); +	snap_done = rsp->n_barrier_done;  	_rcu_barrier_trace(rsp, "Check", -1, snap_done); -	if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) { + +	/* +	 * If the value in snap is odd, we needed to wait for the current +	 * rcu_barrier() to complete, then wait for the next one, in other +	 * words, we need the value of snap_done to be three larger than +	 * the value of snap.  On the other hand, if the value in snap is +	 * even, we only had to wait for the next rcu_barrier() to complete, +	 * in other words, we need the value of snap_done to be only two +	 * greater than the value of snap.  The "(snap + 3) & ~0x1" computes +	 * this for us (thank you, Linus!). +	 */ +	if (ULONG_CMP_GE(snap_done, (snap + 3) & ~0x1)) {  		_rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);  		smp_mb(); /* caller's subsequent code after above check. */  		mutex_unlock(&rsp->barrier_mutex); @@ -2930,6 +2960,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)  	rdp->blimit = blimit;  	init_callback_list(rdp);  /* Re-enable callbacks on this CPU. */  	rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; +	rcu_sysidle_init_percpu_data(rdp->dynticks);  	atomic_set(&rdp->dynticks->dynticks,  		   (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);  	raw_spin_unlock(&rnp->lock);		/* irqs remain disabled. */ @@ -2952,7 +2983,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)  			rdp->completed = rnp->completed;  			rdp->passed_quiesce = 0;  			rdp->qs_pending = 0; -			trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); +			trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));  		}  		raw_spin_unlock(&rnp->lock); /* irqs already disabled. */  		rnp = rnp->parent; @@ -2982,7 +3013,7 @@ static int rcu_cpu_notify(struct notifier_block *self,  	struct rcu_node *rnp = rdp->mynode;  	struct rcu_state *rsp; -	trace_rcu_utilization("Start CPU hotplug"); +	trace_rcu_utilization(TPS("Start CPU hotplug"));  	switch (action) {  	case CPU_UP_PREPARE:  	case CPU_UP_PREPARE_FROZEN: @@ -3011,7 +3042,26 @@ static int rcu_cpu_notify(struct notifier_block *self,  	default:  		break;  	} -	trace_rcu_utilization("End CPU hotplug"); +	trace_rcu_utilization(TPS("End CPU hotplug")); +	return NOTIFY_OK; +} + +static int rcu_pm_notify(struct notifier_block *self, +			 unsigned long action, void *hcpu) +{ +	switch (action) { +	case PM_HIBERNATION_PREPARE: +	case PM_SUSPEND_PREPARE: +		if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ +			rcu_expedited = 1; +		break; +	case PM_POST_HIBERNATION: +	case PM_POST_SUSPEND: +		rcu_expedited = 0; +		break; +	default: +		break; +	}  	return NOTIFY_OK;  } @@ -3256,6 +3306,7 @@ void __init rcu_init(void)  	 * or the scheduler are operational.  	 */  	cpu_notifier(rcu_cpu_notify, 0); +	pm_notifier(rcu_pm_notify, 0);  	for_each_online_cpu(cpu)  		rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);  } | 
