Merge branch 'sched/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into cputime-tip

Conflicts: drivers/cpufreq/cpufreq_conservative.c drivers/cpufreq/cpufreq_ondemand.c drivers/macintosh/rack-meter.c fs/proc/stat.c fs/proc/uptime.c kernel/sched/core.c
2011-12-19 19:23:15 +01:00 · 2011-12-19 19:23:15 +01:00 · 612ef28a04
parent c3e0ef9a29 07cde2608a
commit 612ef28a04
29 changed files with 2608 additions and 2375 deletions
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@ -115,21 +115,21 @@ static void appldata_get_os_data(void *data)
 	j = 0;
 	for_each_online_cpu(i) {
 		os_data->os_cpu[j].per_cpu_user =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.user);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
 		os_data->os_cpu[j].per_cpu_nice =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.nice);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
 		os_data->os_cpu[j].per_cpu_system =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.system);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
 		os_data->os_cpu[j].per_cpu_idle =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.idle);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
 		os_data->os_cpu[j].per_cpu_irq =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.irq);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
 		os_data->os_cpu[j].per_cpu_softirq =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.softirq);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
 		os_data->os_cpu[j].per_cpu_iowait =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.iowait);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
 		os_data->os_cpu[j].per_cpu_steal =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.steal);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
 		os_data->os_cpu[j].cpu_id = i;
 		j++;
 	}
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@ -218,7 +218,7 @@ static inline void fpu_fxsave(struct fpu *fpu)
 #ifdef CONFIG_SMP
 #define safe_address (__per_cpu_offset[0])
 #else
-#define safe_address (kstat_cpu(0).cpustat.user)
+#define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER])
 #endif
 /*
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@ -95,26 +95,26 @@ static struct dbs_tuners {
 	.freq_step = 5,
 };
-static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
+static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 							cputime64_t *wall)
 {
-	cputime64_t idle_time;
+	u64 idle_time;
-	cputime64_t cur_wall_time;
+	u64 cur_wall_time;
-	cputime64_t busy_time;
+	u64 busy_time;
 	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
-	busy_time  = kstat_cpu(cpu).cpustat.user;
+
-	busy_time += kstat_cpu(cpu).cpustat.system;
+	busy_time  = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
-	busy_time += kstat_cpu(cpu).cpustat.irq;
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
-	busy_time += kstat_cpu(cpu).cpustat.softirq;
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
-	busy_time += kstat_cpu(cpu).cpustat.steal;
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
-	busy_time += kstat_cpu(cpu).cpustat.nice;
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
 	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
 	idle_time = cur_wall_time - busy_time;
 	if (wall)
-		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
+		*wall = jiffies_to_usecs(cur_wall_time);
-	return (cputime64_t)jiffies_to_usecs(idle_time);
+	return jiffies_to_usecs(idle_time);
 }
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
@ -271,7 +271,7 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
 		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 						&dbs_info->prev_cpu_wall);
 		if (dbs_tuners_ins.ignore_nice)
-			dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+			dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 	}
 	return count;
 }
@ -361,11 +361,11 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 		j_dbs_info->prev_cpu_idle = cur_idle_time;
 		if (dbs_tuners_ins.ignore_nice) {
-			cputime64_t cur_nice;
+			u64 cur_nice;
 			unsigned long cur_nice_jiffies;
-			cur_nice = kstat_cpu(j).cpustat.nice -
+			cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
-					j_dbs_info->prev_cpu_nice;
+					 j_dbs_info->prev_cpu_nice;
 			/*
 			 * Assumption: nice time between sampling periods will
 			 * be less than 2^32 jiffies for 32 bit sys
@ -373,7 +373,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 			cur_nice_jiffies = (unsigned long)
 					cputime64_to_jiffies64(cur_nice);
-			j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+			j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 			idle_time += jiffies_to_usecs(cur_nice_jiffies);
 		}
@ -500,10 +500,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 						&j_dbs_info->prev_cpu_wall);
-			if (dbs_tuners_ins.ignore_nice) {
+			if (dbs_tuners_ins.ignore_nice)
 				j_dbs_info->prev_cpu_nice =
-						kstat_cpu(j).cpustat.nice;
+						kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 			}
 		}
 		this_dbs_info->down_skip = 0;
 		this_dbs_info->requested_freq = policy->cur;
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@ -119,26 +119,26 @@ static struct dbs_tuners {
 	.powersave_bias = 0,
 };
-static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
+static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 							cputime64_t *wall)
 {
-	cputime64_t idle_time;
+	u64 idle_time;
-	cputime64_t cur_wall_time;
+	u64 cur_wall_time;
-	cputime64_t busy_time;
+	u64 busy_time;
 	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
-	busy_time  = kstat_cpu(cpu).cpustat.user;
+
-	busy_time += kstat_cpu(cpu).cpustat.system;
+	busy_time  = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
-	busy_time += kstat_cpu(cpu).cpustat.irq;
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
-	busy_time += kstat_cpu(cpu).cpustat.softirq;
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
-	busy_time += kstat_cpu(cpu).cpustat.steal;
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
-	busy_time += kstat_cpu(cpu).cpustat.nice;
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
 	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
 	idle_time = cur_wall_time - busy_time;
 	if (wall)
-		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
+		*wall = jiffies_to_usecs(cur_wall_time);
-	return (cputime64_t)jiffies_to_usecs(idle_time);
+	return jiffies_to_usecs(idle_time);
 }
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
@ -344,7 +344,7 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
 		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 						&dbs_info->prev_cpu_wall);
 		if (dbs_tuners_ins.ignore_nice)
-			dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+			dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 	}
 	return count;
@ -454,11 +454,11 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 		j_dbs_info->prev_cpu_iowait = cur_iowait_time;
 		if (dbs_tuners_ins.ignore_nice) {
-			cputime64_t cur_nice;
+			u64 cur_nice;
 			unsigned long cur_nice_jiffies;
-			cur_nice = kstat_cpu(j).cpustat.nice -
+			cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
-					j_dbs_info->prev_cpu_nice;
+					 j_dbs_info->prev_cpu_nice;
 			/*
 			 * Assumption: nice time between sampling periods will
 			 * be less than 2^32 jiffies for 32 bit sys
@ -466,7 +466,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 			cur_nice_jiffies = (unsigned long)
 					cputime64_to_jiffies64(cur_nice);
-			j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+			j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 			idle_time += jiffies_to_usecs(cur_nice_jiffies);
 		}
@ -645,10 +645,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 						&j_dbs_info->prev_cpu_wall);
-			if (dbs_tuners_ins.ignore_nice) {
+			if (dbs_tuners_ins.ignore_nice)
 				j_dbs_info->prev_cpu_nice =
-						kstat_cpu(j).cpustat.nice;
+						kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 			}
 		}
 		this_dbs_info->cpu = cpu;
 		this_dbs_info->rate_mult = 1;
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@ -81,12 +81,13 @@ static int rackmeter_ignore_nice;
 */
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
 {
-	cputime64_t retval;
+	u64 retval;
-	retval = kstat_cpu(cpu).cpustat.idle + kstat_cpu(cpu).cpustat.iowait;
+	retval = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE] +
 		 kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
 	if (rackmeter_ignore_nice)
-		retval += kstat_cpu(cpu).cpustat.nice;
+		retval += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
 	return retval;
 }
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@ -22,14 +22,13 @@
 #define arch_idle_time(cpu) 0
 #endif
-static cputime64_t get_idle_time(int cpu)
+static u64 get_idle_time(int cpu)
 {
-	u64 idle_time = get_cpu_idle_time_us(cpu, NULL);
+	u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
 	cputime64_t idle;
 	if (idle_time == -1ULL) {
 		/* !NO_HZ so we can rely on cpustat.idle */
-		idle = kstat_cpu(cpu).cpustat.idle;
+		idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
 		idle += arch_idle_time(cpu);
 	} else
 		idle = nsecs_to_jiffies64(1000 * idle_time);
@ -37,14 +36,13 @@ static cputime64_t get_idle_time(int cpu)
 	return idle;
 }
-static cputime64_t get_iowait_time(int cpu)
+static u64 get_iowait_time(int cpu)
 {
-	u64 iowait_time = get_cpu_iowait_time_us(cpu, NULL);
+	u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL);
 	cputime64_t iowait;
 	if (iowait_time == -1ULL)
 		/* !NO_HZ so we can rely on cpustat.iowait */
-		iowait = kstat_cpu(cpu).cpustat.iowait;
+		iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
 	else
 		iowait = nsecs_to_jiffies64(1000 * iowait_time);
@ -55,8 +53,8 @@ static int show_stat(struct seq_file *p, void *v)
 {
 	int i, j;
 	unsigned long jif;
-	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
+	u64 user, nice, system, idle, iowait, irq, softirq, steal;
-	cputime64_t guest, guest_nice;
+	u64 guest, guest_nice;
 	u64 sum = 0;
 	u64 sum_softirq = 0;
 	unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
@ -69,18 +67,16 @@ static int show_stat(struct seq_file *p, void *v)
 	jif = boottime.tv_sec;
 	for_each_possible_cpu(i) {
-		user += kstat_cpu(i).cpustat.user;
+		user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
-		nice += kstat_cpu(i).cpustat.nice;
+		nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
-		system += kstat_cpu(i).cpustat.system;
+		system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
 		idle += get_idle_time(i);
 		iowait += get_iowait_time(i);
-		irq += kstat_cpu(i).cpustat.irq;
+		irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
-		softirq += kstat_cpu(i).cpustat.softirq;
+		softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
-		steal += kstat_cpu(i).cpustat.steal;
+		steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
-		guest += kstat_cpu(i).cpustat.guest;
+		guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
-		guest_nice += kstat_cpu(i).cpustat.guest_nice;
+		guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
 		sum += kstat_cpu_irqs_sum(i);
 		sum += arch_irq_stat_cpu(i);
 		for (j = 0; j < NR_SOFTIRQS; j++) {
 			unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
@ -105,16 +101,16 @@ static int show_stat(struct seq_file *p, void *v)
 		(unsigned long long)cputime64_to_clock_t(guest_nice));
 	for_each_online_cpu(i) {
 		/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
-		user = kstat_cpu(i).cpustat.user;
+		user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
-		nice = kstat_cpu(i).cpustat.nice;
+		nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
-		system = kstat_cpu(i).cpustat.system;
+		system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
 		idle = get_idle_time(i);
 		iowait = get_iowait_time(i);
-		irq = kstat_cpu(i).cpustat.irq;
+		irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
-		softirq = kstat_cpu(i).cpustat.softirq;
+		softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
-		steal = kstat_cpu(i).cpustat.steal;
+		steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
-		guest = kstat_cpu(i).cpustat.guest;
+		guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
-		guest_nice = kstat_cpu(i).cpustat.guest_nice;
+		guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
 		seq_printf(p,
 			"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
 			"%llu\n",
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@ -11,14 +11,14 @@ static int uptime_proc_show(struct seq_file *m, void *v)
 {
 	struct timespec uptime;
 	struct timespec idle;
-	cputime64_t idletime;
+	u64 idletime;
 	u64 nsec;
 	u32 rem;
 	int i;
 	idletime = 0;
 	for_each_possible_cpu(i)
-		idletime += kstat_cpu(i).cpustat.idle;
+		idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
 	do_posix_clock_monotonic_gettime(&uptime);
 	monotonic_to_bootbased(&uptime);
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@ -6,6 +6,7 @@
 #include <linux/percpu.h>
 #include <linux/cpumask.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <asm/irq.h>
 #include <asm/cputime.h>
@ -15,21 +16,25 @@
 * used by rstatd/perfmeter
 */
-struct cpu_usage_stat {
+enum cpu_usage_stat {
-	cputime64_t user;
+	CPUTIME_USER,
-	cputime64_t nice;
+	CPUTIME_NICE,
-	cputime64_t system;
+	CPUTIME_SYSTEM,
-	cputime64_t softirq;
+	CPUTIME_SOFTIRQ,
-	cputime64_t irq;
+	CPUTIME_IRQ,
-	cputime64_t idle;
+	CPUTIME_IDLE,
-	cputime64_t iowait;
+	CPUTIME_IOWAIT,
-	cputime64_t steal;
+	CPUTIME_STEAL,
-	cputime64_t guest;
+	CPUTIME_GUEST,
-	cputime64_t guest_nice;
+	CPUTIME_GUEST_NICE,
 	NR_STATS,
 };
 struct kernel_cpustat {
 	u64 cpustat[NR_STATS];
 };
 struct kernel_stat {
 	struct cpu_usage_stat	cpustat;
 #ifndef CONFIG_GENERIC_HARDIRQS
       unsigned int irqs[NR_IRQS];
 #endif
@ -38,10 +43,13 @@ struct kernel_stat {
 };
 DECLARE_PER_CPU(struct kernel_stat, kstat);
 DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
 #define kstat_cpu(cpu)	per_cpu(kstat, cpu)
 /* Must have preemption disabled for this to be meaningful. */
-#define kstat_this_cpu	__get_cpu_var(kstat)
+#define kstat_this_cpu (&__get_cpu_var(kstat))
 #define kcpustat_this_cpu (&__get_cpu_var(kernel_cpustat))
 #define kstat_cpu(cpu) per_cpu(kstat, cpu)
 #define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu)
 extern unsigned long long nr_context_switches(void);
--- a/include/linux/latencytop.h
+++ b/include/linux/latencytop.h
@ -10,6 +10,8 @@
 #define _INCLUDE_GUARD_LATENCYTOP_H_
 #include <linux/compiler.h>
 struct task_struct;
 #ifdef CONFIG_LATENCYTOP
 #define LT_SAVECOUNT		32
@ -23,7 +25,6 @@ struct latency_record {
 };
 struct task_struct;
 extern int latencytop_enabled;
 void __account_scheduler_latency(struct task_struct *task, int usecs, int inter);
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@ -273,9 +273,11 @@ extern int runqueue_is_locked(int cpu);
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
 extern void select_nohz_load_balancer(int stop_tick);
 extern void set_cpu_sd_state_idle(void);
 extern int get_nohz_timer_target(void);
 #else
 static inline void select_nohz_load_balancer(int stop_tick) { }
 static inline void set_cpu_sd_state_idle(void) { }
 #endif
 /*
@ -901,6 +903,10 @@ struct sched_group_power {
 	 * single CPU.
 	 */
 	unsigned int power, power_orig;
 	/*
 	 * Number of busy cpus in this group.
 	 */
 	atomic_t nr_busy_cpus;
 };
 struct sched_group {
@ -925,6 +931,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
 	return to_cpumask(sg->cpumask);
 }
 /**
 * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
 * @group: The group whose first cpu is to be returned.
 */
 static inline unsigned int group_first_cpu(struct sched_group *group)
 {
 	return cpumask_first(sched_group_cpus(group));
 }
 struct sched_domain_attr {
 	int relax_domain_level;
 };
@ -1315,8 +1330,8 @@ struct task_struct {
 	 * older sibling, respectively.  (p->father can be replaced with 
 	 * p->real_parent->pid)
 	 */
-	struct task_struct *real_parent; /* real parent process */
+	struct task_struct __rcu *real_parent; /* real parent process */
-	struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */
+	struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
 	/*
 	 * children/sibling forms the list of my natural children
 	 */
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@ -330,6 +330,13 @@ DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
 	     TP_PROTO(struct task_struct *tsk, u64 delay),
 	     TP_ARGS(tsk, delay));
 /*
 * Tracepoint for accounting blocked time (time the task is in uninterruptible).
 */
 DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
 	     TP_PROTO(struct task_struct *tsk, u64 delay),
 	     TP_ARGS(tsk, delay));
 /*
 * Tracepoint for accounting runtime (time the task is executing
 * on a CPU).
--- a/kernel/Makefile
+++ b/kernel/Makefile
@ -2,16 +2,15 @@
 # Makefile for the linux kernel.
 #
-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
+obj-y     = fork.o exec_domain.o panic.o printk.o \
 	    cpu.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
-	    notifier.o ksysfs.o sched_clock.o cred.o \
+	    notifier.o ksysfs.o cred.o \
-	    async.o range.o
+	    async.o range.o groups.o
 obj-y += groups.o
 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace debug files and internal ftrace files
@ -20,10 +19,11 @@ CFLAGS_REMOVE_lockdep_proc.o = -pg
 CFLAGS_REMOVE_mutex-debug.o = -pg
 CFLAGS_REMOVE_rtmutex-debug.o = -pg
 CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_irq_work.o = -pg
 endif
 obj-y += sched/
 obj-$(CONFIG_FREEZER) += freezer.o
 obj-$(CONFIG_PROFILING) += profile.o
 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
@ -99,7 +99,6 @@ obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_TRACEPOINTS) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_IRQ_WORK) += irq_work.o
 obj-$(CONFIG_CPU_PM) += cpu_pm.o
@ -110,15 +109,6 @@ obj-$(CONFIG_PADATA) += padata.o
 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
 # needed for x86 only.  Why this used to be enabled for all architectures is beyond
 # me.  I suspect most platforms don't need this, but until we know that for sure
 # I turn this off for IA-64 only.  Andreas Schwab says it's also needed on m68k
 # to get a correct value for the wait-channel (WCHAN in ps). --davidm
 CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer
 endif
 $(obj)/configs.o: $(obj)/config_data.h
 # config_data.h contains the same information as ikconfig.h but gzipped.
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@ -0,0 +1,20 @@
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_clock.o = -pg
 endif
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
 # needed for x86 only.  Why this used to be enabled for all architectures is beyond
 # me.  I suspect most platforms don't need this, but until we know that for sure
 # I turn this off for IA-64 only.  Andreas Schwab says it's also needed on m68k
 # to get a correct value for the wait-channel (WCHAN in ps). --davidm
 CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
 endif
 obj-y += core.o clock.o idle_task.o fair.o rt.o stop_task.o
 obj-$(CONFIG_SMP) += cpupri.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_SCHED_DEBUG) += debug.o
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@ -1,15 +1,19 @@
 #ifdef CONFIG_SCHED_AUTOGROUP
 #include "sched.h"
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
 #include <linux/utsname.h>
 #include <linux/security.h>
 #include <linux/export.h>
 unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
 static struct autogroup autogroup_default;
 static atomic_t autogroup_seq_nr;
-static void __init autogroup_init(struct task_struct *init_task)
+void __init autogroup_init(struct task_struct *init_task)
 {
 	autogroup_default.tg = &root_task_group;
 	kref_init(&autogroup_default.kref);
@ -17,7 +21,7 @@ static void __init autogroup_init(struct task_struct *init_task)
 	init_task->signal->autogroup = &autogroup_default;
 }
-static inline void autogroup_free(struct task_group *tg)
+void autogroup_free(struct task_group *tg)
 {
 	kfree(tg->autogroup);
 }
@ -59,10 +63,6 @@ static inline struct autogroup *autogroup_task_get(struct task_struct *p)
 	return ag;
 }
 #ifdef CONFIG_RT_GROUP_SCHED
 static void free_rt_sched_group(struct task_group *tg);
 #endif
 static inline struct autogroup *autogroup_create(void)
 {
 	struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
@ -108,8 +108,7 @@ out_fail:
 	return autogroup_kref_get(&autogroup_default);
 }
-static inline bool
+bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
 task_wants_autogroup(struct task_struct *p, struct task_group *tg)
 {
 	if (tg != &root_task_group)
 		return false;
@ -127,22 +126,6 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg)
 	return true;
 }
 static inline bool task_group_is_autogroup(struct task_group *tg)
 {
 	return !!tg->autogroup;
 }
 static inline struct task_group *
 autogroup_task_group(struct task_struct *p, struct task_group *tg)
 {
 	int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
 	if (enabled && task_wants_autogroup(p, tg))
 		return p->signal->autogroup->tg;
 	return tg;
 }
 static void
 autogroup_move_group(struct task_struct *p, struct autogroup *ag)
 {
@ -263,7 +246,7 @@ out:
 #endif /* CONFIG_PROC_FS */
 #ifdef CONFIG_SCHED_DEBUG
-static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
+int autogroup_path(struct task_group *tg, char *buf, int buflen)
 {
 	if (!task_group_is_autogroup(tg))
 		return 0;
--- a/kernel/sched/auto_group.h
+++ b/kernel/sched/auto_group.h
@ -1,5 +1,8 @@
 #ifdef CONFIG_SCHED_AUTOGROUP
 #include <linux/kref.h>
 #include <linux/rwsem.h>
 struct autogroup {
 	/*
 	 * reference doesn't mean how many thread attach to this
@ -13,9 +16,28 @@ struct autogroup {
 	int			nice;
 };
-static inline bool task_group_is_autogroup(struct task_group *tg);
+extern void autogroup_init(struct task_struct *init_task);
 extern void autogroup_free(struct task_group *tg);
 static inline bool task_group_is_autogroup(struct task_group *tg)
 {
 	return !!tg->autogroup;
 }
 extern bool task_wants_autogroup(struct task_struct *p, struct task_group *tg);
 static inline struct task_group *
-autogroup_task_group(struct task_struct *p, struct task_group *tg);
+autogroup_task_group(struct task_struct *p, struct task_group *tg)
 {
 	int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
 	if (enabled && task_wants_autogroup(p, tg))
 		return p->signal->autogroup->tg;
 	return tg;
 }
 extern int autogroup_path(struct task_group *tg, char *buf, int buflen);
 #else /* !CONFIG_SCHED_AUTOGROUP */
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@ -1,5 +1,5 @@
 /*
- *  kernel/sched_cpupri.c
+ *  kernel/sched/cpupri.c
 *
 *  CPU priority management
 *
@ -28,7 +28,7 @@
 */
 #include <linux/gfp.h>
-#include "sched_cpupri.h"
+#include "cpupri.h"
 /* Convert between a 140 based task->prio, and our 102 based cpupri */
 static int convert_prio(int prio)
--- a/kernel/sched/cpupri.h
+++ b/kernel/sched/cpupri.h
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@ -1,5 +1,5 @@
 /*
- * kernel/time/sched_debug.c
+ * kernel/sched/debug.c
 *
 * Print the CFS rbtree
 *
@ -16,6 +16,8 @@
 #include <linux/kallsyms.h>
 #include <linux/utsname.h>
 #include "sched.h"
 static DEFINE_SPINLOCK(sched_debug_lock);
 /*
@ -373,7 +375,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
 	return 0;
 }
-static void sysrq_sched_debug_show(void)
+void sysrq_sched_debug_show(void)
 {
 	sched_debug_show(NULL, NULL);
 }
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@ -3,13 +3,13 @@
 * them to run sooner, but does not allow tons of sleepers to
 * rip the spread apart.
 */
-SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1)
+SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
 /*
 * Place new tasks ahead so that they do not starve already running
 * tasks
 */
-SCHED_FEAT(START_DEBIT, 1)
+SCHED_FEAT(START_DEBIT, true)
 /*
 * Based on load and program behaviour, see if it makes sense to place
@ -17,54 +17,54 @@ SCHED_FEAT(START_DEBIT, 1)
 * improve cache locality. Typically used with SYNC wakeups as
 * generated by pipes and the like, see also SYNC_WAKEUPS.
 */
-SCHED_FEAT(AFFINE_WAKEUPS, 1)
+SCHED_FEAT(AFFINE_WAKEUPS, true)
 /*
 * Prefer to schedule the task we woke last (assuming it failed
 * wakeup-preemption), since its likely going to consume data we
 * touched, increases cache locality.
 */
-SCHED_FEAT(NEXT_BUDDY, 0)
+SCHED_FEAT(NEXT_BUDDY, false)
 /*
 * Prefer to schedule the task that ran last (when we did
 * wake-preempt) as that likely will touch the same data, increases
 * cache locality.
 */
-SCHED_FEAT(LAST_BUDDY, 1)
+SCHED_FEAT(LAST_BUDDY, true)
 /*
 * Consider buddies to be cache hot, decreases the likelyness of a
 * cache buddy being migrated away, increases cache locality.
 */
-SCHED_FEAT(CACHE_HOT_BUDDY, 1)
+SCHED_FEAT(CACHE_HOT_BUDDY, true)
 /*
 * Use arch dependent cpu power functions
 */
-SCHED_FEAT(ARCH_POWER, 0)
+SCHED_FEAT(ARCH_POWER, false)
-SCHED_FEAT(HRTICK, 0)
+SCHED_FEAT(HRTICK, false)
-SCHED_FEAT(DOUBLE_TICK, 0)
+SCHED_FEAT(DOUBLE_TICK, false)
-SCHED_FEAT(LB_BIAS, 1)
+SCHED_FEAT(LB_BIAS, true)
 /*
 * Spin-wait on mutex acquisition when the mutex owner is running on
 * another cpu -- assumes that when the owner is running, it will soon
 * release the lock. Decreases scheduling overhead.
 */
-SCHED_FEAT(OWNER_SPIN, 1)
+SCHED_FEAT(OWNER_SPIN, true)
 /*
 * Decrement CPU power based on time not spent running tasks
 */
-SCHED_FEAT(NONTASK_POWER, 1)
+SCHED_FEAT(NONTASK_POWER, true)
 /*
 * Queue remote wakeups on the target CPU and process them
 * using the scheduler IPI. Reduces rq->lock contention/bounces.
 */
-SCHED_FEAT(TTWU_QUEUE, 1)
+SCHED_FEAT(TTWU_QUEUE, true)
-SCHED_FEAT(FORCE_SD_OVERLAP, 0)
+SCHED_FEAT(FORCE_SD_OVERLAP, false)
-SCHED_FEAT(RT_RUNTIME_SHARE, 1)
+SCHED_FEAT(RT_RUNTIME_SHARE, true)
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@ -1,3 +1,5 @@
 #include "sched.h"
 /*
 * idle-task scheduling class.
 *
@ -71,7 +73,7 @@ static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task
 /*
 * Simple, special scheduling class for the per-CPU idle tasks:
 */
-static const struct sched_class idle_sched_class = {
+const struct sched_class idle_sched_class = {
 	/* .next is NULL */
 	/* no enqueue/yield_task for idle tasks */
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@ -3,7 +3,92 @@
 * policies)
 */
 #include "sched.h"
 #include <linux/slab.h>
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
 struct rt_bandwidth def_rt_bandwidth;
 static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
 {
 	struct rt_bandwidth *rt_b =
 		container_of(timer, struct rt_bandwidth, rt_period_timer);
 	ktime_t now;
 	int overrun;
 	int idle = 0;
 	for (;;) {
 		now = hrtimer_cb_get_time(timer);
 		overrun = hrtimer_forward(timer, now, rt_b->rt_period);
 		if (!overrun)
 			break;
 		idle = do_sched_rt_period_timer(rt_b, overrun);
 	}
 	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
 }
 void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
 {
 	rt_b->rt_period = ns_to_ktime(period);
 	rt_b->rt_runtime = runtime;
 	raw_spin_lock_init(&rt_b->rt_runtime_lock);
 	hrtimer_init(&rt_b->rt_period_timer,
 			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	rt_b->rt_period_timer.function = sched_rt_period_timer;
 }
 static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
 {
 	if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
 		return;
 	if (hrtimer_active(&rt_b->rt_period_timer))
 		return;
 	raw_spin_lock(&rt_b->rt_runtime_lock);
 	start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
 	raw_spin_unlock(&rt_b->rt_runtime_lock);
 }
 void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 {
 	struct rt_prio_array *array;
 	int i;
 	array = &rt_rq->active;
 	for (i = 0; i < MAX_RT_PRIO; i++) {
 		INIT_LIST_HEAD(array->queue + i);
 		__clear_bit(i, array->bitmap);
 	}
 	/* delimiter for bitsearch: */
 	__set_bit(MAX_RT_PRIO, array->bitmap);
 #if defined CONFIG_SMP
 	rt_rq->highest_prio.curr = MAX_RT_PRIO;
 	rt_rq->highest_prio.next = MAX_RT_PRIO;
 	rt_rq->rt_nr_migratory = 0;
 	rt_rq->overloaded = 0;
 	plist_head_init(&rt_rq->pushable_tasks);
 #endif
 	rt_rq->rt_time = 0;
 	rt_rq->rt_throttled = 0;
 	rt_rq->rt_runtime = 0;
 	raw_spin_lock_init(&rt_rq->rt_runtime_lock);
 }
 #ifdef CONFIG_RT_GROUP_SCHED
 static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
 {
 	hrtimer_cancel(&rt_b->rt_period_timer);
 }
 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
@ -25,6 +110,91 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
 	return rt_se->rt_rq;
 }
 void free_rt_sched_group(struct task_group *tg)
 {
 	int i;
 	if (tg->rt_se)
 		destroy_rt_bandwidth(&tg->rt_bandwidth);
 	for_each_possible_cpu(i) {
 		if (tg->rt_rq)
 			kfree(tg->rt_rq[i]);
 		if (tg->rt_se)
 			kfree(tg->rt_se[i]);
 	}
 	kfree(tg->rt_rq);
 	kfree(tg->rt_se);
 }
 void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
 		struct sched_rt_entity *rt_se, int cpu,
 		struct sched_rt_entity *parent)
 {
 	struct rq *rq = cpu_rq(cpu);
 	rt_rq->highest_prio.curr = MAX_RT_PRIO;
 	rt_rq->rt_nr_boosted = 0;
 	rt_rq->rq = rq;
 	rt_rq->tg = tg;
 	tg->rt_rq[cpu] = rt_rq;
 	tg->rt_se[cpu] = rt_se;
 	if (!rt_se)
 		return;
 	if (!parent)
 		rt_se->rt_rq = &rq->rt;
 	else
 		rt_se->rt_rq = parent->my_q;
 	rt_se->my_q = rt_rq;
 	rt_se->parent = parent;
 	INIT_LIST_HEAD(&rt_se->run_list);
 }
 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 {
 	struct rt_rq *rt_rq;
 	struct sched_rt_entity *rt_se;
 	int i;
 	tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL);
 	if (!tg->rt_rq)
 		goto err;
 	tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL);
 	if (!tg->rt_se)
 		goto err;
 	init_rt_bandwidth(&tg->rt_bandwidth,
 			ktime_to_ns(def_rt_bandwidth.rt_period), 0);
 	for_each_possible_cpu(i) {
 		rt_rq = kzalloc_node(sizeof(struct rt_rq),
 				     GFP_KERNEL, cpu_to_node(i));
 		if (!rt_rq)
 			goto err;
 		rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
 				     GFP_KERNEL, cpu_to_node(i));
 		if (!rt_se)
 			goto err_free_rq;
 		init_rt_rq(rt_rq, cpu_rq(i));
 		rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
 		init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
 	}
 	return 1;
 err_free_rq:
 	kfree(rt_rq);
 err:
 	return 0;
 }
 #else /* CONFIG_RT_GROUP_SCHED */
 #define rt_entity_is_task(rt_se) (1)
@ -47,6 +217,12 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
 	return &rq->rt;
 }
 void free_rt_sched_group(struct task_group *tg) { }
 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 {
 	return 1;
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 #ifdef CONFIG_SMP
@ -556,6 +732,28 @@ static void enable_runtime(struct rq *rq)
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
 	int cpu = (int)(long)hcpu;
 	switch (action) {
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
 		disable_runtime(cpu_rq(cpu));
 		return NOTIFY_OK;
 	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
 		enable_runtime(cpu_rq(cpu));
 		return NOTIFY_OK;
 	default:
 		return NOTIFY_DONE;
 	}
 }
 static int balance_runtime(struct rt_rq *rt_rq)
 {
 	int more = 0;
@ -648,7 +846,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 	if (rt_rq->rt_throttled)
 		return rt_rq_throttled(rt_rq);
-	if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq))
+	if (runtime >= sched_rt_period(rt_rq))
 		return 0;
 	balance_runtime(rt_rq);
@ -957,8 +1155,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 }
 /*
- * Put task to the end of the run list without the overhead of dequeue
+ * Put task to the head or the end of the run list without the overhead of
- * followed by enqueue.
+ * dequeue followed by enqueue.
 */
 static void
 requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
@ -1002,6 +1200,9 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 	cpu = task_cpu(p);
 	if (p->rt.nr_cpus_allowed == 1)
 		goto out;
 	/* For anything but wake ups, just return the task_cpu */
 	if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
 		goto out;
@ -1178,8 +1379,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 /* Only try algorithms three times */
 #define RT_MAX_TRIES 3
 static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
@ -1653,13 +1852,14 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
 		pull_rt_task(rq);
 }
-static inline void init_sched_rt_class(void)
+void init_sched_rt_class(void)
 {
 	unsigned int i;
-	for_each_possible_cpu(i)
+	for_each_possible_cpu(i) {
 		zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
 					GFP_KERNEL, cpu_to_node(i));
 	}
 }
 #endif /* CONFIG_SMP */
@ -1800,7 +2000,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
 		return 0;
 }
-static const struct sched_class rt_sched_class = {
+const struct sched_class rt_sched_class = {
 	.next			= &fair_sched_class,
 	.enqueue_task		= enqueue_task_rt,
 	.dequeue_task		= dequeue_task_rt,
@ -1835,7 +2035,7 @@ static const struct sched_class rt_sched_class = {
 #ifdef CONFIG_SCHED_DEBUG
 extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
-static void print_rt_stats(struct seq_file *m, int cpu)
+void print_rt_stats(struct seq_file *m, int cpu)
 {
 	rt_rq_iter_t iter;
 	struct rt_rq *rt_rq;
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@ -0,0 +1,111 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
 #include "sched.h"
 /*
 * bump this up when changing the output format or the meaning of an existing
 * format, so that tools can adapt (or abort)
 */
 #define SCHEDSTAT_VERSION 15
 static int show_schedstat(struct seq_file *seq, void *v)
 {
 	int cpu;
 	int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
 	char *mask_str = kmalloc(mask_len, GFP_KERNEL);
 	if (mask_str == NULL)
 		return -ENOMEM;
 	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
 	seq_printf(seq, "timestamp %lu\n", jiffies);
 	for_each_online_cpu(cpu) {
 		struct rq *rq = cpu_rq(cpu);
 #ifdef CONFIG_SMP
 		struct sched_domain *sd;
 		int dcount = 0;
 #endif
 		/* runqueue-specific stats */
 		seq_printf(seq,
 		    "cpu%d %u %u %u %u %u %u %llu %llu %lu",
 		    cpu, rq->yld_count,
 		    rq->sched_switch, rq->sched_count, rq->sched_goidle,
 		    rq->ttwu_count, rq->ttwu_local,
 		    rq->rq_cpu_time,
 		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
 		seq_printf(seq, "\n");
 #ifdef CONFIG_SMP
 		/* domain-specific stats */
 		rcu_read_lock();
 		for_each_domain(cpu, sd) {
 			enum cpu_idle_type itype;
 			cpumask_scnprintf(mask_str, mask_len,
 					  sched_domain_span(sd));
 			seq_printf(seq, "domain%d %s", dcount++, mask_str);
 			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
 					itype++) {
 				seq_printf(seq, " %u %u %u %u %u %u %u %u",
 				    sd->lb_count[itype],
 				    sd->lb_balanced[itype],
 				    sd->lb_failed[itype],
 				    sd->lb_imbalance[itype],
 				    sd->lb_gained[itype],
 				    sd->lb_hot_gained[itype],
 				    sd->lb_nobusyq[itype],
 				    sd->lb_nobusyg[itype]);
 			}
 			seq_printf(seq,
 				   " %u %u %u %u %u %u %u %u %u %u %u %u\n",
 			    sd->alb_count, sd->alb_failed, sd->alb_pushed,
 			    sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
 			    sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
 			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
 			    sd->ttwu_move_balance);
 		}
 		rcu_read_unlock();
 #endif
 	}
 	kfree(mask_str);
 	return 0;
 }
 static int schedstat_open(struct inode *inode, struct file *file)
 {
 	unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
 	char *buf = kmalloc(size, GFP_KERNEL);
 	struct seq_file *m;
 	int res;
 	if (!buf)
 		return -ENOMEM;
 	res = single_open(file, show_schedstat, NULL);
 	if (!res) {
 		m = file->private_data;
 		m->buf = buf;
 		m->size = size;
 	} else
 		kfree(buf);
 	return res;
 }
 static const struct file_operations proc_schedstat_operations = {
 	.open    = schedstat_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
 	.release = single_release,
 };
 static int __init proc_schedstat_init(void)
 {
 	proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
 	return 0;
 }
 module_init(proc_schedstat_init);
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@ -1,108 +1,5 @@
 #ifdef CONFIG_SCHEDSTATS
 /*
 * bump this up when changing the output format or the meaning of an existing
 * format, so that tools can adapt (or abort)
 */
 #define SCHEDSTAT_VERSION 15
 static int show_schedstat(struct seq_file *seq, void *v)
 {
 	int cpu;
 	int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
 	char *mask_str = kmalloc(mask_len, GFP_KERNEL);
 	if (mask_str == NULL)
 		return -ENOMEM;
 	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
 	seq_printf(seq, "timestamp %lu\n", jiffies);
 	for_each_online_cpu(cpu) {
 		struct rq *rq = cpu_rq(cpu);
 #ifdef CONFIG_SMP
 		struct sched_domain *sd;
 		int dcount = 0;
 #endif
 		/* runqueue-specific stats */
 		seq_printf(seq,
 		    "cpu%d %u %u %u %u %u %u %llu %llu %lu",
 		    cpu, rq->yld_count,
 		    rq->sched_switch, rq->sched_count, rq->sched_goidle,
 		    rq->ttwu_count, rq->ttwu_local,
 		    rq->rq_cpu_time,
 		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
 		seq_printf(seq, "\n");
 #ifdef CONFIG_SMP
 		/* domain-specific stats */
 		rcu_read_lock();
 		for_each_domain(cpu, sd) {
 			enum cpu_idle_type itype;
 			cpumask_scnprintf(mask_str, mask_len,
 					  sched_domain_span(sd));
 			seq_printf(seq, "domain%d %s", dcount++, mask_str);
 			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
 					itype++) {
 				seq_printf(seq, " %u %u %u %u %u %u %u %u",
 				    sd->lb_count[itype],
 				    sd->lb_balanced[itype],
 				    sd->lb_failed[itype],
 				    sd->lb_imbalance[itype],
 				    sd->lb_gained[itype],
 				    sd->lb_hot_gained[itype],
 				    sd->lb_nobusyq[itype],
 				    sd->lb_nobusyg[itype]);
 			}
 			seq_printf(seq,
 				   " %u %u %u %u %u %u %u %u %u %u %u %u\n",
 			    sd->alb_count, sd->alb_failed, sd->alb_pushed,
 			    sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
 			    sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
 			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
 			    sd->ttwu_move_balance);
 		}
 		rcu_read_unlock();
 #endif
 	}
 	kfree(mask_str);
 	return 0;
 }
 static int schedstat_open(struct inode *inode, struct file *file)
 {
 	unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
 	char *buf = kmalloc(size, GFP_KERNEL);
 	struct seq_file *m;
 	int res;
 	if (!buf)
 		return -ENOMEM;
 	res = single_open(file, show_schedstat, NULL);
 	if (!res) {
 		m = file->private_data;
 		m->buf = buf;
 		m->size = size;
 	} else
 		kfree(buf);
 	return res;
 }
 static const struct file_operations proc_schedstat_operations = {
 	.open    = schedstat_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
 	.release = single_release,
 };
 static int __init proc_schedstat_init(void)
 {
 	proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
 	return 0;
 }
 module_init(proc_schedstat_init);
 /*
 * Expects runqueue lock to be held for atomicity of update
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@ -1,3 +1,5 @@
 #include "sched.h"
 /*
 * stop-task scheduling class.
 *
@ -80,7 +82,7 @@ get_rr_interval_stop(struct rq *rq, struct task_struct *task)
 /*
 * Simple, special scheduling class for the per-CPU stop tasks:
 */
-static const struct sched_class stop_sched_class = {
+const struct sched_class stop_sched_class = {
 	.next			= &rt_sched_class,
 	.enqueue_task		= enqueue_task_stop,
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@ -296,6 +296,15 @@ void tick_nohz_stop_sched_tick(int inidle)
 	cpu = smp_processor_id();
 	ts = &per_cpu(tick_cpu_sched, cpu);
 	/*
 	 * Update the idle state in the scheduler domain hierarchy
 	 * when tick_nohz_stop_sched_tick() is called from the idle loop.
 	 * State will be updated to busy during the first busy tick after
 	 * exiting idle.
 	 */
 	if (inidle)
 		set_cpu_sd_state_idle();
 	/*
 	 * Call to tick_nohz_start_idle stops the last_update_time from being
 	 * updated. Thus, it must not be called in the event we are called from