dect
/
linux-2.6
Archived
13
0
Fork 0

Merge branch 'sched/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into cputime-tip

Conflicts:
	drivers/cpufreq/cpufreq_conservative.c
	drivers/cpufreq/cpufreq_ondemand.c
	drivers/macintosh/rack-meter.c
	fs/proc/stat.c
	fs/proc/uptime.c
	kernel/sched/core.c
This commit is contained in:
Martin Schwidefsky 2011-12-19 19:23:15 +01:00
commit 612ef28a04
29 changed files with 2608 additions and 2375 deletions

View File

@ -115,21 +115,21 @@ static void appldata_get_os_data(void *data)
j = 0; j = 0;
for_each_online_cpu(i) { for_each_online_cpu(i) {
os_data->os_cpu[j].per_cpu_user = os_data->os_cpu[j].per_cpu_user =
cputime_to_jiffies(kstat_cpu(i).cpustat.user); cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
os_data->os_cpu[j].per_cpu_nice = os_data->os_cpu[j].per_cpu_nice =
cputime_to_jiffies(kstat_cpu(i).cpustat.nice); cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
os_data->os_cpu[j].per_cpu_system = os_data->os_cpu[j].per_cpu_system =
cputime_to_jiffies(kstat_cpu(i).cpustat.system); cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
os_data->os_cpu[j].per_cpu_idle = os_data->os_cpu[j].per_cpu_idle =
cputime_to_jiffies(kstat_cpu(i).cpustat.idle); cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
os_data->os_cpu[j].per_cpu_irq = os_data->os_cpu[j].per_cpu_irq =
cputime_to_jiffies(kstat_cpu(i).cpustat.irq); cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
os_data->os_cpu[j].per_cpu_softirq = os_data->os_cpu[j].per_cpu_softirq =
cputime_to_jiffies(kstat_cpu(i).cpustat.softirq); cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
os_data->os_cpu[j].per_cpu_iowait = os_data->os_cpu[j].per_cpu_iowait =
cputime_to_jiffies(kstat_cpu(i).cpustat.iowait); cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
os_data->os_cpu[j].per_cpu_steal = os_data->os_cpu[j].per_cpu_steal =
cputime_to_jiffies(kstat_cpu(i).cpustat.steal); cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
os_data->os_cpu[j].cpu_id = i; os_data->os_cpu[j].cpu_id = i;
j++; j++;
} }

View File

@ -218,7 +218,7 @@ static inline void fpu_fxsave(struct fpu *fpu)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define safe_address (__per_cpu_offset[0]) #define safe_address (__per_cpu_offset[0])
#else #else
#define safe_address (kstat_cpu(0).cpustat.user) #define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER])
#endif #endif
/* /*

View File

@ -95,26 +95,26 @@ static struct dbs_tuners {
.freq_step = 5, .freq_step = 5,
}; };
static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
cputime64_t *wall)
{ {
cputime64_t idle_time; u64 idle_time;
cputime64_t cur_wall_time; u64 cur_wall_time;
cputime64_t busy_time; u64 busy_time;
cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
busy_time = kstat_cpu(cpu).cpustat.user;
busy_time += kstat_cpu(cpu).cpustat.system; busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
busy_time += kstat_cpu(cpu).cpustat.irq; busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
busy_time += kstat_cpu(cpu).cpustat.softirq; busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
busy_time += kstat_cpu(cpu).cpustat.steal; busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
busy_time += kstat_cpu(cpu).cpustat.nice; busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
idle_time = cur_wall_time - busy_time; idle_time = cur_wall_time - busy_time;
if (wall) if (wall)
*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); *wall = jiffies_to_usecs(cur_wall_time);
return (cputime64_t)jiffies_to_usecs(idle_time); return jiffies_to_usecs(idle_time);
} }
static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
@ -271,7 +271,7 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
dbs_info->prev_cpu_idle = get_cpu_idle_time(j, dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
&dbs_info->prev_cpu_wall); &dbs_info->prev_cpu_wall);
if (dbs_tuners_ins.ignore_nice) if (dbs_tuners_ins.ignore_nice)
dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
} }
return count; return count;
} }
@ -361,11 +361,11 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
j_dbs_info->prev_cpu_idle = cur_idle_time; j_dbs_info->prev_cpu_idle = cur_idle_time;
if (dbs_tuners_ins.ignore_nice) { if (dbs_tuners_ins.ignore_nice) {
cputime64_t cur_nice; u64 cur_nice;
unsigned long cur_nice_jiffies; unsigned long cur_nice_jiffies;
cur_nice = kstat_cpu(j).cpustat.nice - cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
j_dbs_info->prev_cpu_nice; j_dbs_info->prev_cpu_nice;
/* /*
* Assumption: nice time between sampling periods will * Assumption: nice time between sampling periods will
* be less than 2^32 jiffies for 32 bit sys * be less than 2^32 jiffies for 32 bit sys
@ -373,7 +373,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
cur_nice_jiffies = (unsigned long) cur_nice_jiffies = (unsigned long)
cputime64_to_jiffies64(cur_nice); cputime64_to_jiffies64(cur_nice);
j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
idle_time += jiffies_to_usecs(cur_nice_jiffies); idle_time += jiffies_to_usecs(cur_nice_jiffies);
} }
@ -500,10 +500,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
&j_dbs_info->prev_cpu_wall); &j_dbs_info->prev_cpu_wall);
if (dbs_tuners_ins.ignore_nice) { if (dbs_tuners_ins.ignore_nice)
j_dbs_info->prev_cpu_nice = j_dbs_info->prev_cpu_nice =
kstat_cpu(j).cpustat.nice; kcpustat_cpu(j).cpustat[CPUTIME_NICE];
}
} }
this_dbs_info->down_skip = 0; this_dbs_info->down_skip = 0;
this_dbs_info->requested_freq = policy->cur; this_dbs_info->requested_freq = policy->cur;

View File

@ -119,26 +119,26 @@ static struct dbs_tuners {
.powersave_bias = 0, .powersave_bias = 0,
}; };
static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
cputime64_t *wall)
{ {
cputime64_t idle_time; u64 idle_time;
cputime64_t cur_wall_time; u64 cur_wall_time;
cputime64_t busy_time; u64 busy_time;
cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
busy_time = kstat_cpu(cpu).cpustat.user;
busy_time += kstat_cpu(cpu).cpustat.system; busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
busy_time += kstat_cpu(cpu).cpustat.irq; busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
busy_time += kstat_cpu(cpu).cpustat.softirq; busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
busy_time += kstat_cpu(cpu).cpustat.steal; busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
busy_time += kstat_cpu(cpu).cpustat.nice; busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
idle_time = cur_wall_time - busy_time; idle_time = cur_wall_time - busy_time;
if (wall) if (wall)
*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); *wall = jiffies_to_usecs(cur_wall_time);
return (cputime64_t)jiffies_to_usecs(idle_time); return jiffies_to_usecs(idle_time);
} }
static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
@ -344,7 +344,7 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
dbs_info->prev_cpu_idle = get_cpu_idle_time(j, dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
&dbs_info->prev_cpu_wall); &dbs_info->prev_cpu_wall);
if (dbs_tuners_ins.ignore_nice) if (dbs_tuners_ins.ignore_nice)
dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
} }
return count; return count;
@ -454,11 +454,11 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
j_dbs_info->prev_cpu_iowait = cur_iowait_time; j_dbs_info->prev_cpu_iowait = cur_iowait_time;
if (dbs_tuners_ins.ignore_nice) { if (dbs_tuners_ins.ignore_nice) {
cputime64_t cur_nice; u64 cur_nice;
unsigned long cur_nice_jiffies; unsigned long cur_nice_jiffies;
cur_nice = kstat_cpu(j).cpustat.nice - cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
j_dbs_info->prev_cpu_nice; j_dbs_info->prev_cpu_nice;
/* /*
* Assumption: nice time between sampling periods will * Assumption: nice time between sampling periods will
* be less than 2^32 jiffies for 32 bit sys * be less than 2^32 jiffies for 32 bit sys
@ -466,7 +466,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
cur_nice_jiffies = (unsigned long) cur_nice_jiffies = (unsigned long)
cputime64_to_jiffies64(cur_nice); cputime64_to_jiffies64(cur_nice);
j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
idle_time += jiffies_to_usecs(cur_nice_jiffies); idle_time += jiffies_to_usecs(cur_nice_jiffies);
} }
@ -645,10 +645,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
&j_dbs_info->prev_cpu_wall); &j_dbs_info->prev_cpu_wall);
if (dbs_tuners_ins.ignore_nice) { if (dbs_tuners_ins.ignore_nice)
j_dbs_info->prev_cpu_nice = j_dbs_info->prev_cpu_nice =
kstat_cpu(j).cpustat.nice; kcpustat_cpu(j).cpustat[CPUTIME_NICE];
}
} }
this_dbs_info->cpu = cpu; this_dbs_info->cpu = cpu;
this_dbs_info->rate_mult = 1; this_dbs_info->rate_mult = 1;

View File

@ -81,12 +81,13 @@ static int rackmeter_ignore_nice;
*/ */
static inline cputime64_t get_cpu_idle_time(unsigned int cpu) static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
{ {
cputime64_t retval; u64 retval;
retval = kstat_cpu(cpu).cpustat.idle + kstat_cpu(cpu).cpustat.iowait; retval = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE] +
kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
if (rackmeter_ignore_nice) if (rackmeter_ignore_nice)
retval += kstat_cpu(cpu).cpustat.nice; retval += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
return retval; return retval;
} }

View File

@ -22,14 +22,13 @@
#define arch_idle_time(cpu) 0 #define arch_idle_time(cpu) 0
#endif #endif
static cputime64_t get_idle_time(int cpu) static u64 get_idle_time(int cpu)
{ {
u64 idle_time = get_cpu_idle_time_us(cpu, NULL); u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
cputime64_t idle;
if (idle_time == -1ULL) { if (idle_time == -1ULL) {
/* !NO_HZ so we can rely on cpustat.idle */ /* !NO_HZ so we can rely on cpustat.idle */
idle = kstat_cpu(cpu).cpustat.idle; idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
idle += arch_idle_time(cpu); idle += arch_idle_time(cpu);
} else } else
idle = nsecs_to_jiffies64(1000 * idle_time); idle = nsecs_to_jiffies64(1000 * idle_time);
@ -37,14 +36,13 @@ static cputime64_t get_idle_time(int cpu)
return idle; return idle;
} }
static cputime64_t get_iowait_time(int cpu) static u64 get_iowait_time(int cpu)
{ {
u64 iowait_time = get_cpu_iowait_time_us(cpu, NULL); u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL);
cputime64_t iowait;
if (iowait_time == -1ULL) if (iowait_time == -1ULL)
/* !NO_HZ so we can rely on cpustat.iowait */ /* !NO_HZ so we can rely on cpustat.iowait */
iowait = kstat_cpu(cpu).cpustat.iowait; iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
else else
iowait = nsecs_to_jiffies64(1000 * iowait_time); iowait = nsecs_to_jiffies64(1000 * iowait_time);
@ -55,8 +53,8 @@ static int show_stat(struct seq_file *p, void *v)
{ {
int i, j; int i, j;
unsigned long jif; unsigned long jif;
cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; u64 user, nice, system, idle, iowait, irq, softirq, steal;
cputime64_t guest, guest_nice; u64 guest, guest_nice;
u64 sum = 0; u64 sum = 0;
u64 sum_softirq = 0; u64 sum_softirq = 0;
unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
@ -69,18 +67,16 @@ static int show_stat(struct seq_file *p, void *v)
jif = boottime.tv_sec; jif = boottime.tv_sec;
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
user += kstat_cpu(i).cpustat.user; user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
nice += kstat_cpu(i).cpustat.nice; nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
system += kstat_cpu(i).cpustat.system; system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
idle += get_idle_time(i); idle += get_idle_time(i);
iowait += get_iowait_time(i); iowait += get_iowait_time(i);
irq += kstat_cpu(i).cpustat.irq; irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
softirq += kstat_cpu(i).cpustat.softirq; softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
steal += kstat_cpu(i).cpustat.steal; steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
guest += kstat_cpu(i).cpustat.guest; guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
guest_nice += kstat_cpu(i).cpustat.guest_nice; guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
sum += kstat_cpu_irqs_sum(i);
sum += arch_irq_stat_cpu(i);
for (j = 0; j < NR_SOFTIRQS; j++) { for (j = 0; j < NR_SOFTIRQS; j++) {
unsigned int softirq_stat = kstat_softirqs_cpu(j, i); unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
@ -105,16 +101,16 @@ static int show_stat(struct seq_file *p, void *v)
(unsigned long long)cputime64_to_clock_t(guest_nice)); (unsigned long long)cputime64_to_clock_t(guest_nice));
for_each_online_cpu(i) { for_each_online_cpu(i) {
/* Copy values here to work around gcc-2.95.3, gcc-2.96 */ /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
user = kstat_cpu(i).cpustat.user; user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
nice = kstat_cpu(i).cpustat.nice; nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
system = kstat_cpu(i).cpustat.system; system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
idle = get_idle_time(i); idle = get_idle_time(i);
iowait = get_iowait_time(i); iowait = get_iowait_time(i);
irq = kstat_cpu(i).cpustat.irq; irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
softirq = kstat_cpu(i).cpustat.softirq; softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
steal = kstat_cpu(i).cpustat.steal; steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
guest = kstat_cpu(i).cpustat.guest; guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
guest_nice = kstat_cpu(i).cpustat.guest_nice; guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
seq_printf(p, seq_printf(p,
"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu " "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
"%llu\n", "%llu\n",

View File

@ -11,14 +11,14 @@ static int uptime_proc_show(struct seq_file *m, void *v)
{ {
struct timespec uptime; struct timespec uptime;
struct timespec idle; struct timespec idle;
cputime64_t idletime; u64 idletime;
u64 nsec; u64 nsec;
u32 rem; u32 rem;
int i; int i;
idletime = 0; idletime = 0;
for_each_possible_cpu(i) for_each_possible_cpu(i)
idletime += kstat_cpu(i).cpustat.idle; idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
do_posix_clock_monotonic_gettime(&uptime); do_posix_clock_monotonic_gettime(&uptime);
monotonic_to_bootbased(&uptime); monotonic_to_bootbased(&uptime);

View File

@ -6,6 +6,7 @@
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/sched.h>
#include <asm/irq.h> #include <asm/irq.h>
#include <asm/cputime.h> #include <asm/cputime.h>
@ -15,21 +16,25 @@
* used by rstatd/perfmeter * used by rstatd/perfmeter
*/ */
struct cpu_usage_stat { enum cpu_usage_stat {
cputime64_t user; CPUTIME_USER,
cputime64_t nice; CPUTIME_NICE,
cputime64_t system; CPUTIME_SYSTEM,
cputime64_t softirq; CPUTIME_SOFTIRQ,
cputime64_t irq; CPUTIME_IRQ,
cputime64_t idle; CPUTIME_IDLE,
cputime64_t iowait; CPUTIME_IOWAIT,
cputime64_t steal; CPUTIME_STEAL,
cputime64_t guest; CPUTIME_GUEST,
cputime64_t guest_nice; CPUTIME_GUEST_NICE,
NR_STATS,
};
struct kernel_cpustat {
u64 cpustat[NR_STATS];
}; };
struct kernel_stat { struct kernel_stat {
struct cpu_usage_stat cpustat;
#ifndef CONFIG_GENERIC_HARDIRQS #ifndef CONFIG_GENERIC_HARDIRQS
unsigned int irqs[NR_IRQS]; unsigned int irqs[NR_IRQS];
#endif #endif
@ -38,10 +43,13 @@ struct kernel_stat {
}; };
DECLARE_PER_CPU(struct kernel_stat, kstat); DECLARE_PER_CPU(struct kernel_stat, kstat);
DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
#define kstat_cpu(cpu) per_cpu(kstat, cpu)
/* Must have preemption disabled for this to be meaningful. */ /* Must have preemption disabled for this to be meaningful. */
#define kstat_this_cpu __get_cpu_var(kstat) #define kstat_this_cpu (&__get_cpu_var(kstat))
#define kcpustat_this_cpu (&__get_cpu_var(kernel_cpustat))
#define kstat_cpu(cpu) per_cpu(kstat, cpu)
#define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu)
extern unsigned long long nr_context_switches(void); extern unsigned long long nr_context_switches(void);

View File

@ -10,6 +10,8 @@
#define _INCLUDE_GUARD_LATENCYTOP_H_ #define _INCLUDE_GUARD_LATENCYTOP_H_
#include <linux/compiler.h> #include <linux/compiler.h>
struct task_struct;
#ifdef CONFIG_LATENCYTOP #ifdef CONFIG_LATENCYTOP
#define LT_SAVECOUNT 32 #define LT_SAVECOUNT 32
@ -23,7 +25,6 @@ struct latency_record {
}; };
struct task_struct;
extern int latencytop_enabled; extern int latencytop_enabled;
void __account_scheduler_latency(struct task_struct *task, int usecs, int inter); void __account_scheduler_latency(struct task_struct *task, int usecs, int inter);

View File

@ -273,9 +273,11 @@ extern int runqueue_is_locked(int cpu);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
extern void select_nohz_load_balancer(int stop_tick); extern void select_nohz_load_balancer(int stop_tick);
extern void set_cpu_sd_state_idle(void);
extern int get_nohz_timer_target(void); extern int get_nohz_timer_target(void);
#else #else
static inline void select_nohz_load_balancer(int stop_tick) { } static inline void select_nohz_load_balancer(int stop_tick) { }
static inline void set_cpu_sd_state_idle(void) { }
#endif #endif
/* /*
@ -901,6 +903,10 @@ struct sched_group_power {
* single CPU. * single CPU.
*/ */
unsigned int power, power_orig; unsigned int power, power_orig;
/*
* Number of busy cpus in this group.
*/
atomic_t nr_busy_cpus;
}; };
struct sched_group { struct sched_group {
@ -925,6 +931,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
return to_cpumask(sg->cpumask); return to_cpumask(sg->cpumask);
} }
/**
* group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
* @group: The group whose first cpu is to be returned.
*/
static inline unsigned int group_first_cpu(struct sched_group *group)
{
return cpumask_first(sched_group_cpus(group));
}
struct sched_domain_attr { struct sched_domain_attr {
int relax_domain_level; int relax_domain_level;
}; };
@ -1315,8 +1330,8 @@ struct task_struct {
* older sibling, respectively. (p->father can be replaced with * older sibling, respectively. (p->father can be replaced with
* p->real_parent->pid) * p->real_parent->pid)
*/ */
struct task_struct *real_parent; /* real parent process */ struct task_struct __rcu *real_parent; /* real parent process */
struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */ struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
/* /*
* children/sibling forms the list of my natural children * children/sibling forms the list of my natural children
*/ */

View File

@ -330,6 +330,13 @@ DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
TP_PROTO(struct task_struct *tsk, u64 delay), TP_PROTO(struct task_struct *tsk, u64 delay),
TP_ARGS(tsk, delay)); TP_ARGS(tsk, delay));
/*
* Tracepoint for accounting blocked time (time the task is in uninterruptible).
*/
DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
TP_PROTO(struct task_struct *tsk, u64 delay),
TP_ARGS(tsk, delay));
/* /*
* Tracepoint for accounting runtime (time the task is executing * Tracepoint for accounting runtime (time the task is executing
* on a CPU). * on a CPU).

View File

@ -2,16 +2,15 @@
# Makefile for the linux kernel. # Makefile for the linux kernel.
# #
obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ obj-y = fork.o exec_domain.o panic.o printk.o \
cpu.o exit.o itimer.o time.o softirq.o resource.o \ cpu.o exit.o itimer.o time.o softirq.o resource.o \
sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
signal.o sys.o kmod.o workqueue.o pid.o \ signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o extable.o params.o posix-timers.o \ rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o sched_clock.o cred.o \ notifier.o ksysfs.o cred.o \
async.o range.o async.o range.o groups.o
obj-y += groups.o
ifdef CONFIG_FUNCTION_TRACER ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files # Do not trace debug files and internal ftrace files
@ -20,10 +19,11 @@ CFLAGS_REMOVE_lockdep_proc.o = -pg
CFLAGS_REMOVE_mutex-debug.o = -pg CFLAGS_REMOVE_mutex-debug.o = -pg
CFLAGS_REMOVE_rtmutex-debug.o = -pg CFLAGS_REMOVE_rtmutex-debug.o = -pg
CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg
CFLAGS_REMOVE_sched_clock.o = -pg
CFLAGS_REMOVE_irq_work.o = -pg CFLAGS_REMOVE_irq_work.o = -pg
endif endif
obj-y += sched/
obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_FREEZER) += freezer.o
obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
@ -99,7 +99,6 @@ obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_X86_DS) += trace/
obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/
obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_CPU_PM) += cpu_pm.o
@ -110,15 +109,6 @@ obj-$(CONFIG_PADATA) += padata.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
# needed for x86 only. Why this used to be enabled for all architectures is beyond
# me. I suspect most platforms don't need this, but until we know that for sure
# I turn this off for IA-64 only. Andreas Schwab says it's also needed on m68k
# to get a correct value for the wait-channel (WCHAN in ps). --davidm
CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer
endif
$(obj)/configs.o: $(obj)/config_data.h $(obj)/configs.o: $(obj)/config_data.h
# config_data.h contains the same information as ikconfig.h but gzipped. # config_data.h contains the same information as ikconfig.h but gzipped.

20
kernel/sched/Makefile Normal file
View File

@ -0,0 +1,20 @@
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_clock.o = -pg
endif
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
# needed for x86 only. Why this used to be enabled for all architectures is beyond
# me. I suspect most platforms don't need this, but until we know that for sure
# I turn this off for IA-64 only. Andreas Schwab says it's also needed on m68k
# to get a correct value for the wait-channel (WCHAN in ps). --davidm
CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
endif
obj-y += core.o clock.o idle_task.o fair.o rt.o stop_task.o
obj-$(CONFIG_SMP) += cpupri.o
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
obj-$(CONFIG_SCHEDSTATS) += stats.o
obj-$(CONFIG_SCHED_DEBUG) += debug.o

View File

@ -1,15 +1,19 @@
#ifdef CONFIG_SCHED_AUTOGROUP #ifdef CONFIG_SCHED_AUTOGROUP
#include "sched.h"
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/kallsyms.h> #include <linux/kallsyms.h>
#include <linux/utsname.h> #include <linux/utsname.h>
#include <linux/security.h>
#include <linux/export.h>
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
static struct autogroup autogroup_default; static struct autogroup autogroup_default;
static atomic_t autogroup_seq_nr; static atomic_t autogroup_seq_nr;
static void __init autogroup_init(struct task_struct *init_task) void __init autogroup_init(struct task_struct *init_task)
{ {
autogroup_default.tg = &root_task_group; autogroup_default.tg = &root_task_group;
kref_init(&autogroup_default.kref); kref_init(&autogroup_default.kref);
@ -17,7 +21,7 @@ static void __init autogroup_init(struct task_struct *init_task)
init_task->signal->autogroup = &autogroup_default; init_task->signal->autogroup = &autogroup_default;
} }
static inline void autogroup_free(struct task_group *tg) void autogroup_free(struct task_group *tg)
{ {
kfree(tg->autogroup); kfree(tg->autogroup);
} }
@ -59,10 +63,6 @@ static inline struct autogroup *autogroup_task_get(struct task_struct *p)
return ag; return ag;
} }
#ifdef CONFIG_RT_GROUP_SCHED
static void free_rt_sched_group(struct task_group *tg);
#endif
static inline struct autogroup *autogroup_create(void) static inline struct autogroup *autogroup_create(void)
{ {
struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
@ -108,8 +108,7 @@ out_fail:
return autogroup_kref_get(&autogroup_default); return autogroup_kref_get(&autogroup_default);
} }
static inline bool bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
task_wants_autogroup(struct task_struct *p, struct task_group *tg)
{ {
if (tg != &root_task_group) if (tg != &root_task_group)
return false; return false;
@ -127,22 +126,6 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg)
return true; return true;
} }
static inline bool task_group_is_autogroup(struct task_group *tg)
{
return !!tg->autogroup;
}
static inline struct task_group *
autogroup_task_group(struct task_struct *p, struct task_group *tg)
{
int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
if (enabled && task_wants_autogroup(p, tg))
return p->signal->autogroup->tg;
return tg;
}
static void static void
autogroup_move_group(struct task_struct *p, struct autogroup *ag) autogroup_move_group(struct task_struct *p, struct autogroup *ag)
{ {
@ -263,7 +246,7 @@ out:
#endif /* CONFIG_PROC_FS */ #endif /* CONFIG_PROC_FS */
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) int autogroup_path(struct task_group *tg, char *buf, int buflen)
{ {
if (!task_group_is_autogroup(tg)) if (!task_group_is_autogroup(tg))
return 0; return 0;

View File

@ -1,5 +1,8 @@
#ifdef CONFIG_SCHED_AUTOGROUP #ifdef CONFIG_SCHED_AUTOGROUP
#include <linux/kref.h>
#include <linux/rwsem.h>
struct autogroup { struct autogroup {
/* /*
* reference doesn't mean how many thread attach to this * reference doesn't mean how many thread attach to this
@ -13,9 +16,28 @@ struct autogroup {
int nice; int nice;
}; };
static inline bool task_group_is_autogroup(struct task_group *tg); extern void autogroup_init(struct task_struct *init_task);
extern void autogroup_free(struct task_group *tg);
static inline bool task_group_is_autogroup(struct task_group *tg)
{
return !!tg->autogroup;
}
extern bool task_wants_autogroup(struct task_struct *p, struct task_group *tg);
static inline struct task_group * static inline struct task_group *
autogroup_task_group(struct task_struct *p, struct task_group *tg); autogroup_task_group(struct task_struct *p, struct task_group *tg)
{
int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
if (enabled && task_wants_autogroup(p, tg))
return p->signal->autogroup->tg;
return tg;
}
extern int autogroup_path(struct task_group *tg, char *buf, int buflen);
#else /* !CONFIG_SCHED_AUTOGROUP */ #else /* !CONFIG_SCHED_AUTOGROUP */

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/* /*
* kernel/sched_cpupri.c * kernel/sched/cpupri.c
* *
* CPU priority management * CPU priority management
* *
@ -28,7 +28,7 @@
*/ */
#include <linux/gfp.h> #include <linux/gfp.h>
#include "sched_cpupri.h" #include "cpupri.h"
/* Convert between a 140 based task->prio, and our 102 based cpupri */ /* Convert between a 140 based task->prio, and our 102 based cpupri */
static int convert_prio(int prio) static int convert_prio(int prio)

View File

@ -1,5 +1,5 @@
/* /*
* kernel/time/sched_debug.c * kernel/sched/debug.c
* *
* Print the CFS rbtree * Print the CFS rbtree
* *
@ -16,6 +16,8 @@
#include <linux/kallsyms.h> #include <linux/kallsyms.h>
#include <linux/utsname.h> #include <linux/utsname.h>
#include "sched.h"
static DEFINE_SPINLOCK(sched_debug_lock); static DEFINE_SPINLOCK(sched_debug_lock);
/* /*
@ -373,7 +375,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
return 0; return 0;
} }
static void sysrq_sched_debug_show(void) void sysrq_sched_debug_show(void)
{ {
sched_debug_show(NULL, NULL); sched_debug_show(NULL, NULL);
} }

File diff suppressed because it is too large Load Diff

View File

@ -3,13 +3,13 @@
* them to run sooner, but does not allow tons of sleepers to * them to run sooner, but does not allow tons of sleepers to
* rip the spread apart. * rip the spread apart.
*/ */
SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1) SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
/* /*
* Place new tasks ahead so that they do not starve already running * Place new tasks ahead so that they do not starve already running
* tasks * tasks
*/ */
SCHED_FEAT(START_DEBIT, 1) SCHED_FEAT(START_DEBIT, true)
/* /*
* Based on load and program behaviour, see if it makes sense to place * Based on load and program behaviour, see if it makes sense to place
@ -17,54 +17,54 @@ SCHED_FEAT(START_DEBIT, 1)
* improve cache locality. Typically used with SYNC wakeups as * improve cache locality. Typically used with SYNC wakeups as
* generated by pipes and the like, see also SYNC_WAKEUPS. * generated by pipes and the like, see also SYNC_WAKEUPS.
*/ */
SCHED_FEAT(AFFINE_WAKEUPS, 1) SCHED_FEAT(AFFINE_WAKEUPS, true)
/* /*
* Prefer to schedule the task we woke last (assuming it failed * Prefer to schedule the task we woke last (assuming it failed
* wakeup-preemption), since its likely going to consume data we * wakeup-preemption), since its likely going to consume data we
* touched, increases cache locality. * touched, increases cache locality.
*/ */
SCHED_FEAT(NEXT_BUDDY, 0) SCHED_FEAT(NEXT_BUDDY, false)
/* /*
* Prefer to schedule the task that ran last (when we did * Prefer to schedule the task that ran last (when we did
* wake-preempt) as that likely will touch the same data, increases * wake-preempt) as that likely will touch the same data, increases
* cache locality. * cache locality.
*/ */
SCHED_FEAT(LAST_BUDDY, 1) SCHED_FEAT(LAST_BUDDY, true)
/* /*
* Consider buddies to be cache hot, decreases the likelyness of a * Consider buddies to be cache hot, decreases the likelyness of a
* cache buddy being migrated away, increases cache locality. * cache buddy being migrated away, increases cache locality.
*/ */
SCHED_FEAT(CACHE_HOT_BUDDY, 1) SCHED_FEAT(CACHE_HOT_BUDDY, true)
/* /*
* Use arch dependent cpu power functions * Use arch dependent cpu power functions
*/ */
SCHED_FEAT(ARCH_POWER, 0) SCHED_FEAT(ARCH_POWER, false)
SCHED_FEAT(HRTICK, 0) SCHED_FEAT(HRTICK, false)
SCHED_FEAT(DOUBLE_TICK, 0) SCHED_FEAT(DOUBLE_TICK, false)
SCHED_FEAT(LB_BIAS, 1) SCHED_FEAT(LB_BIAS, true)
/* /*
* Spin-wait on mutex acquisition when the mutex owner is running on * Spin-wait on mutex acquisition when the mutex owner is running on
* another cpu -- assumes that when the owner is running, it will soon * another cpu -- assumes that when the owner is running, it will soon
* release the lock. Decreases scheduling overhead. * release the lock. Decreases scheduling overhead.
*/ */
SCHED_FEAT(OWNER_SPIN, 1) SCHED_FEAT(OWNER_SPIN, true)
/* /*
* Decrement CPU power based on time not spent running tasks * Decrement CPU power based on time not spent running tasks
*/ */
SCHED_FEAT(NONTASK_POWER, 1) SCHED_FEAT(NONTASK_POWER, true)
/* /*
* Queue remote wakeups on the target CPU and process them * Queue remote wakeups on the target CPU and process them
* using the scheduler IPI. Reduces rq->lock contention/bounces. * using the scheduler IPI. Reduces rq->lock contention/bounces.
*/ */
SCHED_FEAT(TTWU_QUEUE, 1) SCHED_FEAT(TTWU_QUEUE, true)
SCHED_FEAT(FORCE_SD_OVERLAP, 0) SCHED_FEAT(FORCE_SD_OVERLAP, false)
SCHED_FEAT(RT_RUNTIME_SHARE, 1) SCHED_FEAT(RT_RUNTIME_SHARE, true)

View File

@ -1,3 +1,5 @@
#include "sched.h"
/* /*
* idle-task scheduling class. * idle-task scheduling class.
* *
@ -71,7 +73,7 @@ static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task
/* /*
* Simple, special scheduling class for the per-CPU idle tasks: * Simple, special scheduling class for the per-CPU idle tasks:
*/ */
static const struct sched_class idle_sched_class = { const struct sched_class idle_sched_class = {
/* .next is NULL */ /* .next is NULL */
/* no enqueue/yield_task for idle tasks */ /* no enqueue/yield_task for idle tasks */

View File

@ -3,7 +3,92 @@
* policies) * policies)
*/ */
#include "sched.h"
#include <linux/slab.h>
static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
struct rt_bandwidth def_rt_bandwidth;
static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
{
struct rt_bandwidth *rt_b =
container_of(timer, struct rt_bandwidth, rt_period_timer);
ktime_t now;
int overrun;
int idle = 0;
for (;;) {
now = hrtimer_cb_get_time(timer);
overrun = hrtimer_forward(timer, now, rt_b->rt_period);
if (!overrun)
break;
idle = do_sched_rt_period_timer(rt_b, overrun);
}
return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
}
void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
{
rt_b->rt_period = ns_to_ktime(period);
rt_b->rt_runtime = runtime;
raw_spin_lock_init(&rt_b->rt_runtime_lock);
hrtimer_init(&rt_b->rt_period_timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
rt_b->rt_period_timer.function = sched_rt_period_timer;
}
static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
{
if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
return;
if (hrtimer_active(&rt_b->rt_period_timer))
return;
raw_spin_lock(&rt_b->rt_runtime_lock);
start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
raw_spin_unlock(&rt_b->rt_runtime_lock);
}
void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
{
struct rt_prio_array *array;
int i;
array = &rt_rq->active;
for (i = 0; i < MAX_RT_PRIO; i++) {
INIT_LIST_HEAD(array->queue + i);
__clear_bit(i, array->bitmap);
}
/* delimiter for bitsearch: */
__set_bit(MAX_RT_PRIO, array->bitmap);
#if defined CONFIG_SMP
rt_rq->highest_prio.curr = MAX_RT_PRIO;
rt_rq->highest_prio.next = MAX_RT_PRIO;
rt_rq->rt_nr_migratory = 0;
rt_rq->overloaded = 0;
plist_head_init(&rt_rq->pushable_tasks);
#endif
rt_rq->rt_time = 0;
rt_rq->rt_throttled = 0;
rt_rq->rt_runtime = 0;
raw_spin_lock_init(&rt_rq->rt_runtime_lock);
}
#ifdef CONFIG_RT_GROUP_SCHED #ifdef CONFIG_RT_GROUP_SCHED
static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
{
hrtimer_cancel(&rt_b->rt_period_timer);
}
#define rt_entity_is_task(rt_se) (!(rt_se)->my_q) #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
@ -25,6 +110,91 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
return rt_se->rt_rq; return rt_se->rt_rq;
} }
void free_rt_sched_group(struct task_group *tg)
{
int i;
if (tg->rt_se)
destroy_rt_bandwidth(&tg->rt_bandwidth);
for_each_possible_cpu(i) {
if (tg->rt_rq)
kfree(tg->rt_rq[i]);
if (tg->rt_se)
kfree(tg->rt_se[i]);
}
kfree(tg->rt_rq);
kfree(tg->rt_se);
}
void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
struct sched_rt_entity *rt_se, int cpu,
struct sched_rt_entity *parent)
{
struct rq *rq = cpu_rq(cpu);
rt_rq->highest_prio.curr = MAX_RT_PRIO;
rt_rq->rt_nr_boosted = 0;
rt_rq->rq = rq;
rt_rq->tg = tg;
tg->rt_rq[cpu] = rt_rq;
tg->rt_se[cpu] = rt_se;
if (!rt_se)
return;
if (!parent)
rt_se->rt_rq = &rq->rt;
else
rt_se->rt_rq = parent->my_q;
rt_se->my_q = rt_rq;
rt_se->parent = parent;
INIT_LIST_HEAD(&rt_se->run_list);
}
int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
{
struct rt_rq *rt_rq;
struct sched_rt_entity *rt_se;
int i;
tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL);
if (!tg->rt_rq)
goto err;
tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL);
if (!tg->rt_se)
goto err;
init_rt_bandwidth(&tg->rt_bandwidth,
ktime_to_ns(def_rt_bandwidth.rt_period), 0);
for_each_possible_cpu(i) {
rt_rq = kzalloc_node(sizeof(struct rt_rq),
GFP_KERNEL, cpu_to_node(i));
if (!rt_rq)
goto err;
rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
GFP_KERNEL, cpu_to_node(i));
if (!rt_se)
goto err_free_rq;
init_rt_rq(rt_rq, cpu_rq(i));
rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
}
return 1;
err_free_rq:
kfree(rt_rq);
err:
return 0;
}
#else /* CONFIG_RT_GROUP_SCHED */ #else /* CONFIG_RT_GROUP_SCHED */
#define rt_entity_is_task(rt_se) (1) #define rt_entity_is_task(rt_se) (1)
@ -47,6 +217,12 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
return &rq->rt; return &rq->rt;
} }
void free_rt_sched_group(struct task_group *tg) { }
int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
{
return 1;
}
#endif /* CONFIG_RT_GROUP_SCHED */ #endif /* CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
@ -556,6 +732,28 @@ static void enable_runtime(struct rq *rq)
raw_spin_unlock_irqrestore(&rq->lock, flags); raw_spin_unlock_irqrestore(&rq->lock, flags);
} }
int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
int cpu = (int)(long)hcpu;
switch (action) {
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
disable_runtime(cpu_rq(cpu));
return NOTIFY_OK;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
enable_runtime(cpu_rq(cpu));
return NOTIFY_OK;
default:
return NOTIFY_DONE;
}
}
static int balance_runtime(struct rt_rq *rt_rq) static int balance_runtime(struct rt_rq *rt_rq)
{ {
int more = 0; int more = 0;
@ -648,7 +846,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
if (rt_rq->rt_throttled) if (rt_rq->rt_throttled)
return rt_rq_throttled(rt_rq); return rt_rq_throttled(rt_rq);
if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) if (runtime >= sched_rt_period(rt_rq))
return 0; return 0;
balance_runtime(rt_rq); balance_runtime(rt_rq);
@ -957,8 +1155,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
} }
/* /*
* Put task to the end of the run list without the overhead of dequeue * Put task to the head or the end of the run list without the overhead of
* followed by enqueue. * dequeue followed by enqueue.
*/ */
static void static void
requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
@ -1002,6 +1200,9 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
cpu = task_cpu(p); cpu = task_cpu(p);
if (p->rt.nr_cpus_allowed == 1)
goto out;
/* For anything but wake ups, just return the task_cpu */ /* For anything but wake ups, just return the task_cpu */
if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
goto out; goto out;
@ -1178,8 +1379,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
/* Only try algorithms three times */ /* Only try algorithms three times */
#define RT_MAX_TRIES 3 #define RT_MAX_TRIES 3
static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
{ {
if (!task_running(rq, p) && if (!task_running(rq, p) &&
@ -1653,13 +1852,14 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
pull_rt_task(rq); pull_rt_task(rq);
} }
static inline void init_sched_rt_class(void) void init_sched_rt_class(void)
{ {
unsigned int i; unsigned int i;
for_each_possible_cpu(i) for_each_possible_cpu(i) {
zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
GFP_KERNEL, cpu_to_node(i)); GFP_KERNEL, cpu_to_node(i));
}
} }
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
@ -1800,7 +2000,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
return 0; return 0;
} }
static const struct sched_class rt_sched_class = { const struct sched_class rt_sched_class = {
.next = &fair_sched_class, .next = &fair_sched_class,
.enqueue_task = enqueue_task_rt, .enqueue_task = enqueue_task_rt,
.dequeue_task = dequeue_task_rt, .dequeue_task = dequeue_task_rt,
@ -1835,7 +2035,7 @@ static const struct sched_class rt_sched_class = {
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
static void print_rt_stats(struct seq_file *m, int cpu) void print_rt_stats(struct seq_file *m, int cpu)
{ {
rt_rq_iter_t iter; rt_rq_iter_t iter;
struct rt_rq *rt_rq; struct rt_rq *rt_rq;

1136
kernel/sched/sched.h Normal file

File diff suppressed because it is too large Load Diff

111
kernel/sched/stats.c Normal file
View File

@ -0,0 +1,111 @@
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include "sched.h"
/*
* bump this up when changing the output format or the meaning of an existing
* format, so that tools can adapt (or abort)
*/
#define SCHEDSTAT_VERSION 15
static int show_schedstat(struct seq_file *seq, void *v)
{
int cpu;
int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
char *mask_str = kmalloc(mask_len, GFP_KERNEL);
if (mask_str == NULL)
return -ENOMEM;
seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
seq_printf(seq, "timestamp %lu\n", jiffies);
for_each_online_cpu(cpu) {
struct rq *rq = cpu_rq(cpu);
#ifdef CONFIG_SMP
struct sched_domain *sd;
int dcount = 0;
#endif
/* runqueue-specific stats */
seq_printf(seq,
"cpu%d %u %u %u %u %u %u %llu %llu %lu",
cpu, rq->yld_count,
rq->sched_switch, rq->sched_count, rq->sched_goidle,
rq->ttwu_count, rq->ttwu_local,
rq->rq_cpu_time,
rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
seq_printf(seq, "\n");
#ifdef CONFIG_SMP
/* domain-specific stats */
rcu_read_lock();
for_each_domain(cpu, sd) {
enum cpu_idle_type itype;
cpumask_scnprintf(mask_str, mask_len,
sched_domain_span(sd));
seq_printf(seq, "domain%d %s", dcount++, mask_str);
for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
itype++) {
seq_printf(seq, " %u %u %u %u %u %u %u %u",
sd->lb_count[itype],
sd->lb_balanced[itype],
sd->lb_failed[itype],
sd->lb_imbalance[itype],
sd->lb_gained[itype],
sd->lb_hot_gained[itype],
sd->lb_nobusyq[itype],
sd->lb_nobusyg[itype]);
}
seq_printf(seq,
" %u %u %u %u %u %u %u %u %u %u %u %u\n",
sd->alb_count, sd->alb_failed, sd->alb_pushed,
sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
sd->ttwu_wake_remote, sd->ttwu_move_affine,
sd->ttwu_move_balance);
}
rcu_read_unlock();
#endif
}
kfree(mask_str);
return 0;
}
static int schedstat_open(struct inode *inode, struct file *file)
{
unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
char *buf = kmalloc(size, GFP_KERNEL);
struct seq_file *m;
int res;
if (!buf)
return -ENOMEM;
res = single_open(file, show_schedstat, NULL);
if (!res) {
m = file->private_data;
m->buf = buf;
m->size = size;
} else
kfree(buf);
return res;
}
static const struct file_operations proc_schedstat_operations = {
.open = schedstat_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int __init proc_schedstat_init(void)
{
proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
return 0;
}
module_init(proc_schedstat_init);

View File

@ -1,108 +1,5 @@
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
/*
* bump this up when changing the output format or the meaning of an existing
* format, so that tools can adapt (or abort)
*/
#define SCHEDSTAT_VERSION 15
static int show_schedstat(struct seq_file *seq, void *v)
{
int cpu;
int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
char *mask_str = kmalloc(mask_len, GFP_KERNEL);
if (mask_str == NULL)
return -ENOMEM;
seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
seq_printf(seq, "timestamp %lu\n", jiffies);
for_each_online_cpu(cpu) {
struct rq *rq = cpu_rq(cpu);
#ifdef CONFIG_SMP
struct sched_domain *sd;
int dcount = 0;
#endif
/* runqueue-specific stats */
seq_printf(seq,
"cpu%d %u %u %u %u %u %u %llu %llu %lu",
cpu, rq->yld_count,
rq->sched_switch, rq->sched_count, rq->sched_goidle,
rq->ttwu_count, rq->ttwu_local,
rq->rq_cpu_time,
rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
seq_printf(seq, "\n");
#ifdef CONFIG_SMP
/* domain-specific stats */
rcu_read_lock();
for_each_domain(cpu, sd) {
enum cpu_idle_type itype;
cpumask_scnprintf(mask_str, mask_len,
sched_domain_span(sd));
seq_printf(seq, "domain%d %s", dcount++, mask_str);
for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
itype++) {
seq_printf(seq, " %u %u %u %u %u %u %u %u",
sd->lb_count[itype],
sd->lb_balanced[itype],
sd->lb_failed[itype],
sd->lb_imbalance[itype],
sd->lb_gained[itype],
sd->lb_hot_gained[itype],
sd->lb_nobusyq[itype],
sd->lb_nobusyg[itype]);
}
seq_printf(seq,
" %u %u %u %u %u %u %u %u %u %u %u %u\n",
sd->alb_count, sd->alb_failed, sd->alb_pushed,
sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
sd->ttwu_wake_remote, sd->ttwu_move_affine,
sd->ttwu_move_balance);
}
rcu_read_unlock();
#endif
}
kfree(mask_str);
return 0;
}
static int schedstat_open(struct inode *inode, struct file *file)
{
unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
char *buf = kmalloc(size, GFP_KERNEL);
struct seq_file *m;
int res;
if (!buf)
return -ENOMEM;
res = single_open(file, show_schedstat, NULL);
if (!res) {
m = file->private_data;
m->buf = buf;
m->size = size;
} else
kfree(buf);
return res;
}
static const struct file_operations proc_schedstat_operations = {
.open = schedstat_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int __init proc_schedstat_init(void)
{
proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
return 0;
}
module_init(proc_schedstat_init);
/* /*
* Expects runqueue lock to be held for atomicity of update * Expects runqueue lock to be held for atomicity of update

View File

@ -1,3 +1,5 @@
#include "sched.h"
/* /*
* stop-task scheduling class. * stop-task scheduling class.
* *
@ -80,7 +82,7 @@ get_rr_interval_stop(struct rq *rq, struct task_struct *task)
/* /*
* Simple, special scheduling class for the per-CPU stop tasks: * Simple, special scheduling class for the per-CPU stop tasks:
*/ */
static const struct sched_class stop_sched_class = { const struct sched_class stop_sched_class = {
.next = &rt_sched_class, .next = &rt_sched_class,
.enqueue_task = enqueue_task_stop, .enqueue_task = enqueue_task_stop,

View File

@ -296,6 +296,15 @@ void tick_nohz_stop_sched_tick(int inidle)
cpu = smp_processor_id(); cpu = smp_processor_id();
ts = &per_cpu(tick_cpu_sched, cpu); ts = &per_cpu(tick_cpu_sched, cpu);
/*
* Update the idle state in the scheduler domain hierarchy
* when tick_nohz_stop_sched_tick() is called from the idle loop.
* State will be updated to busy during the first busy tick after
* exiting idle.
*/
if (inidle)
set_cpu_sd_state_idle();
/* /*
* Call to tick_nohz_start_idle stops the last_update_time from being * Call to tick_nohz_start_idle stops the last_update_time from being
* updated. Thus, it must not be called in the event we are called from * updated. Thus, it must not be called in the event we are called from