From 9d85f21c94f7f7a84d0ba686c58aa6d9da58fdbb Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 4 Oct 2012 13:18:29 +0200 Subject: sched: Track the runnable average on a per-task entity basis Instead of tracking averaging the load parented by a cfs_rq, we can track entity load directly. With the load for a given cfs_rq then being the sum of its children. To do this we represent the historical contribution to runnable average within each trailing 1024us of execution as the coefficients of a geometric series. We can express this for a given task t as: runnable_sum(t) = \Sum u_i * y^i, runnable_avg_period(t) = \Sum 1024 * y^i load(t) = weight_t * runnable_sum(t) / runnable_avg_period(t) Where: u_i is the usage in the last i`th 1024us period (approximately 1ms) ~ms and y is chosen such that y^k = 1/2. We currently choose k to be 32 which roughly translates to about a sched period. Signed-off-by: Paul Turner Reviewed-by: Ben Segall Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120823141506.372695337@google.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0dd42a02df2..418fc6d8a4d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1095,6 +1095,16 @@ struct load_weight { unsigned long weight, inv_weight; }; +struct sched_avg { + /* + * These sums represent an infinite geometric series and so are bound + * above by 1024/(1-y). Thus we only need a u32 to store them for for all + * choices of y < 1-2^(-32)*1024. + */ + u32 runnable_avg_sum, runnable_avg_period; + u64 last_runnable_update; +}; + #ifdef CONFIG_SCHEDSTATS struct sched_statistics { u64 wait_start; @@ -1155,6 +1165,9 @@ struct sched_entity { /* rq "owned" by this entity/group: */ struct cfs_rq *my_q; #endif +#ifdef CONFIG_SMP + struct sched_avg avg; +#endif }; struct sched_rt_entity { -- cgit v1.2.3 From 2dac754e10a5d41d94d2d2365c0345d4f215a266 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 4 Oct 2012 13:18:30 +0200 Subject: sched: Aggregate load contributed by task entities on parenting cfs_rq For a given task t, we can compute its contribution to load as: task_load(t) = runnable_avg(t) * weight(t) On a parenting cfs_rq we can then aggregate: runnable_load(cfs_rq) = \Sum task_load(t), for all runnable children t Maintain this bottom up, with task entities adding their contributed load to the parenting cfs_rq sum. When a task entity's load changes we add the same delta to the maintained sum. Signed-off-by: Paul Turner Reviewed-by: Ben Segall Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120823141506.514678907@google.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 418fc6d8a4d..81d8b1ba410 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1103,6 +1103,7 @@ struct sched_avg { */ u32 runnable_avg_sum, runnable_avg_period; u64 last_runnable_update; + unsigned long load_avg_contrib; }; #ifdef CONFIG_SCHEDSTATS -- cgit v1.2.3 From 9ee474f55664ff63111c843099d365e7ecffb56f Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 4 Oct 2012 13:18:30 +0200 Subject: sched: Maintain the load contribution of blocked entities We are currently maintaining: runnable_load(cfs_rq) = \Sum task_load(t) For all running children t of cfs_rq. While this can be naturally updated for tasks in a runnable state (as they are scheduled); this does not account for the load contributed by blocked task entities. This can be solved by introducing a separate accounting for blocked load: blocked_load(cfs_rq) = \Sum runnable(b) * weight(b) Obviously we do not want to iterate over all blocked entities to account for their decay, we instead observe that: runnable_load(t) = \Sum p_i*y^i and that to account for an additional idle period we only need to compute: y*runnable_load(t). This means that we can compute all blocked entities at once by evaluating: blocked_load(cfs_rq)` = y * blocked_load(cfs_rq) Finally we maintain a decay counter so that when a sleeping entity re-awakens we can determine how much of its load should be removed from the blocked sum. Signed-off-by: Paul Turner Reviewed-by: Ben Segall Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120823141506.585389902@google.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 81d8b1ba410..b1831accfd8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1103,6 +1103,7 @@ struct sched_avg { */ u32 runnable_avg_sum, runnable_avg_period; u64 last_runnable_update; + s64 decay_count; unsigned long load_avg_contrib; }; -- cgit v1.2.3 From 0a74bef8bed18dc6889e9bc37ea1050a50c86c89 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 4 Oct 2012 13:18:30 +0200 Subject: sched: Add an rq migration call-back to sched_class Since we are now doing bottom up load accumulation we need explicit notification when a task has been re-parented so that the old hierarchy can be updated. Adds: migrate_task_rq(struct task_struct *p, int next_cpu) (The alternative is to do this out of __set_task_cpu, but it was suggested that this would be a cleaner encapsulation.) Signed-off-by: Paul Turner Reviewed-by: Ben Segall Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120823141506.660023400@google.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b1831accfd8..e483ccb08ce 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1061,6 +1061,7 @@ struct sched_class { #ifdef CONFIG_SMP int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); + void (*migrate_task_rq)(struct task_struct *p, int next_cpu); void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); void (*post_schedule) (struct rq *this_rq); -- cgit v1.2.3 From f4e26b120b9de84cb627bc7361ba43cfdc51341f Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Thu, 4 Oct 2012 13:18:32 +0200 Subject: sched: Introduce temporary FAIR_GROUP_SCHED dependency for load-tracking While per-entity load-tracking is generally useful, beyond computing shares distribution, e.g. runnable based load-balance (in progress), governors, power-management, etc. These facilities are not yet consumers of this data. This may be trivially reverted when the information is required; but avoid paying the overhead for calculations we will not use until then. Signed-off-by: Paul Turner Reviewed-by: Ben Segall Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120823141507.422162369@google.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index e483ccb08ce..e1581a029e3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1168,7 +1168,13 @@ struct sched_entity { /* rq "owned" by this entity/group: */ struct cfs_rq *my_q; #endif -#ifdef CONFIG_SMP +/* + * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be + * removed when useful for applications beyond shares distribution (e.g. + * load-balance). + */ +#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED) + /* Per-entity load-tracking */ struct sched_avg avg; #endif }; -- cgit v1.2.3 From dcbf832e5823156e8f155359b47bd108cac8ad68 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 5 Oct 2012 23:07:19 +0200 Subject: vtime: Gather vtime declarations to their own header file These APIs are scattered around and are going to expand a bit. Let's create a dedicated header file for sanity. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker --- include/linux/hardirq.h | 11 +---------- include/linux/kernel_stat.h | 9 +-------- include/linux/vtime.h | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+), 18 deletions(-) create mode 100644 include/linux/vtime.h (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index cab3da3d094..b083a475423 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -4,6 +4,7 @@ #include #include #include +#include #include /* @@ -129,16 +130,6 @@ extern void synchronize_irq(unsigned int irq); # define synchronize_irq(irq) barrier() #endif -struct task_struct; - -#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING) -static inline void vtime_account(struct task_struct *tsk) -{ -} -#else -extern void vtime_account(struct task_struct *tsk); -#endif - #if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) static inline void rcu_nmi_enter(void) diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 36d12f0884c..1865b1f2977 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -130,12 +131,4 @@ extern void account_process_tick(struct task_struct *, int user); extern void account_steal_ticks(unsigned long ticks); extern void account_idle_ticks(unsigned long ticks); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -extern void vtime_task_switch(struct task_struct *prev); -extern void vtime_account_system(struct task_struct *tsk); -extern void vtime_account_idle(struct task_struct *tsk); -#else -static inline void vtime_task_switch(struct task_struct *prev) { } -#endif - #endif /* _LINUX_KERNEL_STAT_H */ diff --git a/include/linux/vtime.h b/include/linux/vtime.h new file mode 100644 index 00000000000..7199c24c820 --- /dev/null +++ b/include/linux/vtime.h @@ -0,0 +1,22 @@ +#ifndef _LINUX_KERNEL_VTIME_H +#define _LINUX_KERNEL_VTIME_H + +struct task_struct; + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +extern void vtime_task_switch(struct task_struct *prev); +extern void vtime_account_system(struct task_struct *tsk); +extern void vtime_account_idle(struct task_struct *tsk); +#else +static inline void vtime_task_switch(struct task_struct *prev) { } +#endif + +#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING) +static inline void vtime_account(struct task_struct *tsk) +{ +} +#else +extern void vtime_account(struct task_struct *tsk); +#endif + +#endif /* _LINUX_KERNEL_VTIME_H */ -- cgit v1.2.3 From 11113334d1c5dd5355c86e531c29f1202a855c86 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 24 Oct 2012 18:05:51 +0200 Subject: vtime: Make vtime_account_system() irqsafe vtime_account_system() currently has only one caller with vtime_account() which is irq safe. Now we are going to call it from other places like kvm where irqs are not always disabled by the time we account the cputime. So let's make it irqsafe. The arch implementation part is now prefixed with "__". vtime_account_idle() arch implementation is prefixed accordingly to stay consistent. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens --- include/linux/vtime.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 7199c24c820..b9fc4f9ab47 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -5,10 +5,12 @@ struct task_struct; #ifdef CONFIG_VIRT_CPU_ACCOUNTING extern void vtime_task_switch(struct task_struct *prev); +extern void __vtime_account_system(struct task_struct *tsk); extern void vtime_account_system(struct task_struct *tsk); -extern void vtime_account_idle(struct task_struct *tsk); +extern void __vtime_account_idle(struct task_struct *tsk); #else static inline void vtime_task_switch(struct task_struct *prev) { } +static inline void vtime_account_system(struct task_struct *tsk) { } #endif #if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING) -- cgit v1.2.3 From b080935c8638e08134629d0a9ebdf35669bec14d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 5 Oct 2012 23:07:19 +0200 Subject: kvm: Directly account vtime to system on guest switch Switching to or from guest context is done on ioctl context. So by the time we call kvm_guest_enter() or kvm_guest_exit() we know we are not running the idle task. As a result, we can directly account the cputime using vtime_account_system(). There are two good reasons to do this: * We avoid some useless checks on guest switch. It optimizes a bit this fast path. * In the case of CONFIG_IRQ_TIME_ACCOUNTING, calling vtime_account() checks for irq time to account. This is pointless since we know we are not in an irq on guest switch. This is wasting cpu cycles for no good reason. vtime_account_system() OTOH is a no-op in this config option. * We can remove the irq disable/enable around kvm guest switch in s390. A further optimization may consist in introducing a vtime_account_guest() that directly calls account_guest_time(). Signed-off-by: Frederic Weisbecker Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Avi Kivity Cc: Marcelo Tosatti Cc: Joerg Roedel Cc: Alexander Graf Cc: Xiantao Zhang Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker --- include/linux/kvm_host.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 93bfc9f9815..0e2212fe478 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -737,7 +737,11 @@ static inline int kvm_deassign_device(struct kvm *kvm, static inline void kvm_guest_enter(void) { BUG_ON(preemptible()); - vtime_account(current); + /* + * This is running in ioctl context so we can avoid + * the call to vtime_account() with its unnecessary idle check. + */ + vtime_account_system(current); current->flags |= PF_VCPU; /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode @@ -751,7 +755,11 @@ static inline void kvm_guest_enter(void) static inline void kvm_guest_exit(void) { - vtime_account(current); + /* + * This is running in ioctl context so we can avoid + * the call to vtime_account() with its unnecessary idle check. + */ + vtime_account_system(current); current->flags &= ~PF_VCPU; } -- cgit v1.2.3 From fa5058f3b63153e0147ef65bcdb3a4ee63581346 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 6 Oct 2012 04:07:19 +0200 Subject: cputime: Specialize irq vtime hooks With CONFIG_VIRT_CPU_ACCOUNTING, when vtime_account() is called in irq entry/exit, we perform a check on the context: if we are interrupting the idle task we account the pending cputime to idle, otherwise account to system time or its sub-areas: tsk->stime, hardirq time, softirq time, ... However this check for idle only concerns the hardirq entry and softirq entry: * Hardirq may directly interrupt the idle task, in which case we need to flush the pending CPU time to idle. * The idle task may be directly interrupted by a softirq if it calls local_bh_enable(). There is probably no such call in any idle task but we need to cover every case. Ksoftirqd is not concerned because the idle time is flushed on context switch and softirq in the end of hardirq have the idle time already flushed from the hardirq entry. In the other cases we always account to system/irq time: * On hardirq exit we account the time to hardirq time. * On softirq exit we account the time to softirq time. To optimize this and avoid the indirect call to vtime_account() and the checks it performs, specialize the vtime irq APIs and only perform the check on irq entry. Irq exit can directly call vtime_account_system(). CONFIG_IRQ_TIME_ACCOUNTING behaviour doesn't change and directly maps to its own vtime_account() implementation. One may want to take benefits from the new APIs to optimize irq time accounting as well in the future. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker --- include/linux/hardirq.h | 4 ++-- include/linux/vtime.h | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index b083a475423..624ef3f45c8 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void); */ #define __irq_enter() \ do { \ - vtime_account(current); \ + vtime_account_irq_enter(current); \ add_preempt_count(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ } while (0) @@ -169,7 +169,7 @@ extern void irq_enter(void); #define __irq_exit() \ do { \ trace_hardirq_exit(); \ - vtime_account(current); \ + vtime_account_irq_exit(current); \ sub_preempt_count(HARDIRQ_OFFSET); \ } while (0) diff --git a/include/linux/vtime.h b/include/linux/vtime.h index b9fc4f9ab47..c35c02223da 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -21,4 +21,29 @@ static inline void vtime_account(struct task_struct *tsk) extern void vtime_account(struct task_struct *tsk); #endif +static inline void vtime_account_irq_enter(struct task_struct *tsk) +{ + /* + * Hardirq can interrupt idle task anytime. So we need vtime_account() + * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING. + * Softirq can also interrupt idle task directly if it calls + * local_bh_enable(). Such case probably don't exist but we never know. + * Ksoftirqd is not concerned because idle time is flushed on context + * switch. Softirqs in the end of hardirqs are also not a problem because + * the idle time is flushed on hardirq time already. + */ + vtime_account(tsk); +} + +static inline void vtime_account_irq_exit(struct task_struct *tsk) +{ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + /* On hard|softirq exit we always account to hard|softirq cputime */ + __vtime_account_system(tsk); +#endif +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + vtime_account(tsk); +#endif +} + #endif /* _LINUX_KERNEL_VTIME_H */ -- cgit v1.2.3 From 3e1df4f506836e6bea1ab61cf88c75c8b1840643 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 6 Oct 2012 05:23:22 +0200 Subject: cputime: Separate irqtime accounting from generic vtime vtime_account() doesn't have the same role in CONFIG_VIRT_CPU_ACCOUNTING and CONFIG_IRQ_TIME_ACCOUNTING. In the first case it handles time accounting in any context. In the second case it only handles irq time accounting. So when vtime_account() is called from outside vtime_account_irq_*() this call is pointless to CONFIG_IRQ_TIME_ACCOUNTING. To fix the confusion, change vtime_account() to irqtime_account_irq() in CONFIG_IRQ_TIME_ACCOUNTING. This way we ensure future account_vtime() calls won't waste useless cycles in the irqtime APIs. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker --- include/linux/vtime.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/vtime.h b/include/linux/vtime.h index c35c02223da..0c2a2d30302 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -8,17 +8,18 @@ extern void vtime_task_switch(struct task_struct *prev); extern void __vtime_account_system(struct task_struct *tsk); extern void vtime_account_system(struct task_struct *tsk); extern void __vtime_account_idle(struct task_struct *tsk); +extern void vtime_account(struct task_struct *tsk); #else static inline void vtime_task_switch(struct task_struct *prev) { } +static inline void __vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_system(struct task_struct *tsk) { } +static inline void vtime_account(struct task_struct *tsk) { } #endif -#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING) -static inline void vtime_account(struct task_struct *tsk) -{ -} +#ifdef CONFIG_IRQ_TIME_ACCOUNTING +extern void irqtime_account_irq(struct task_struct *tsk); #else -extern void vtime_account(struct task_struct *tsk); +static inline void irqtime_account_irq(struct task_struct *tsk) { } #endif static inline void vtime_account_irq_enter(struct task_struct *tsk) @@ -33,17 +34,14 @@ static inline void vtime_account_irq_enter(struct task_struct *tsk) * the idle time is flushed on hardirq time already. */ vtime_account(tsk); + irqtime_account_irq(tsk); } static inline void vtime_account_irq_exit(struct task_struct *tsk) { -#ifdef CONFIG_VIRT_CPU_ACCOUNTING /* On hard|softirq exit we always account to hard|softirq cputime */ __vtime_account_system(tsk); -#endif -#ifdef CONFIG_IRQ_TIME_ACCOUNTING - vtime_account(tsk); -#endif + irqtime_account_irq(tsk); } #endif /* _LINUX_KERNEL_VTIME_H */ -- cgit v1.2.3 From fd25b4c2f226de818e1d2b71e3e681d28bcaf5ba Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 13 Nov 2012 18:21:22 +0100 Subject: vtime: Remove the underscore prefix invasion Prepending irq-unsafe vtime APIs with underscores was actually a bad idea as the result is a big mess in the API namespace that is even waiting to be further extended. Also these helpers are always called from irq safe callers except kvm. Just provide a vtime_account_system_irqsafe() for this specific case so that we can remove the underscore prefix on other vtime functions. Signed-off-by: Frederic Weisbecker Reviewed-by: Steven Rostedt Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens --- include/linux/kvm_host.h | 4 ++-- include/linux/vtime.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0e2212fe478..f17158bdd4f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -741,7 +741,7 @@ static inline void kvm_guest_enter(void) * This is running in ioctl context so we can avoid * the call to vtime_account() with its unnecessary idle check. */ - vtime_account_system(current); + vtime_account_system_irqsafe(current); current->flags |= PF_VCPU; /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode @@ -759,7 +759,7 @@ static inline void kvm_guest_exit(void) * This is running in ioctl context so we can avoid * the call to vtime_account() with its unnecessary idle check. */ - vtime_account_system(current); + vtime_account_system_irqsafe(current); current->flags &= ~PF_VCPU; } diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 0c2a2d30302..5ad13c325de 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -5,14 +5,14 @@ struct task_struct; #ifdef CONFIG_VIRT_CPU_ACCOUNTING extern void vtime_task_switch(struct task_struct *prev); -extern void __vtime_account_system(struct task_struct *tsk); extern void vtime_account_system(struct task_struct *tsk); -extern void __vtime_account_idle(struct task_struct *tsk); +extern void vtime_account_system_irqsafe(struct task_struct *tsk); +extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account(struct task_struct *tsk); #else static inline void vtime_task_switch(struct task_struct *prev) { } -static inline void __vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_system(struct task_struct *tsk) { } +static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } static inline void vtime_account(struct task_struct *tsk) { } #endif @@ -40,7 +40,7 @@ static inline void vtime_account_irq_enter(struct task_struct *tsk) static inline void vtime_account_irq_exit(struct task_struct *tsk) { /* On hard|softirq exit we always account to hard|softirq cputime */ - __vtime_account_system(tsk); + vtime_account_system(tsk); irqtime_account_irq(tsk); } -- cgit v1.2.3 From bcebdf846522056a84ba0b0cba5f5413868c9394 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 13 Nov 2012 23:51:06 +0100 Subject: vtime: Explicitly account pending user time on process tick All vtime implementations just flush the user time on process tick. Consolidate that in generic code by calling a user time accounting helper. This avoids an indirect call in ia64 and prepare to also consolidate vtime context switch code. Signed-off-by: Frederic Weisbecker Reviewed-by: Steven Rostedt Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens --- include/linux/kernel_stat.h | 8 ++++++++ include/linux/vtime.h | 1 + 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 1865b1f2977..66b70780e91 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -127,7 +127,15 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t) extern void account_steal_time(cputime_t); extern void account_idle_time(cputime_t); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +static inline void account_process_tick(struct task_struct *tsk, int user) +{ + vtime_account_user(tsk); +} +#else extern void account_process_tick(struct task_struct *, int user); +#endif + extern void account_steal_ticks(unsigned long ticks); extern void account_idle_ticks(unsigned long ticks); diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 5ad13c325de..ae30ab58431 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -8,6 +8,7 @@ extern void vtime_task_switch(struct task_struct *prev); extern void vtime_account_system(struct task_struct *tsk); extern void vtime_account_system_irqsafe(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); +extern void vtime_account_user(struct task_struct *tsk); extern void vtime_account(struct task_struct *tsk); #else static inline void vtime_task_switch(struct task_struct *prev) { } -- cgit v1.2.3 From e80d0a1ae8bb8fee0edd37427836f108b30f596b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 21 Nov 2012 16:26:44 +0100 Subject: cputime: Rename thread_group_times to thread_group_cputime_adjusted We have thread_group_cputime() and thread_group_times(). The naming doesn't provide enough information about the difference between these two APIs. To lower the confusion, rename thread_group_times() to thread_group_cputime_adjusted(). This name better suggests that it's a version of thread_group_cputime() that does some stabilization on the raw cputime values. ie here: scale on top of CFS runtime stats and bound lower value for monotonicity. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index e1581a029e3..e75cab5820a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1751,8 +1751,8 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } -extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st); -extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st); +extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); +extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); /* * Per process flags -- cgit v1.2.3 From d37f761dbd276790f70dcf73a287fde2c3464482 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 22 Nov 2012 00:58:35 +0100 Subject: cputime: Consolidate cputime adjustment code task_cputime_adjusted() and thread_group_cputime_adjusted() essentially share the same code. They just don't use the same source: * The first function uses the cputime in the task struct and the previous adjusted snapshot that ensures monotonicity. * The second adds the cputime of all tasks in the group and the previous adjusted snapshot of the whole group from the signal structure. Just consolidate the common code that does the adjustment. These functions just need to fetch the values from the appropriate source. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Paul Gortmaker --- include/linux/sched.h | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index e75cab5820a..5dafac36681 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -433,14 +433,29 @@ struct cpu_itimer { u32 incr_error; }; +/** + * struct cputime - snaphsot of system and user cputime + * @utime: time spent in user mode + * @stime: time spent in system mode + * + * Gathers a generic snapshot of user and system time. + */ +struct cputime { + cputime_t utime; + cputime_t stime; +}; + /** * struct task_cputime - collected CPU time counts * @utime: time spent in user mode, in &cputime_t units * @stime: time spent in kernel mode, in &cputime_t units * @sum_exec_runtime: total time spent on the CPU, in nanoseconds * - * This structure groups together three kinds of CPU time that are - * tracked for threads and thread groups. Most things considering + * This is an extension of struct cputime that includes the total runtime + * spent by the task from the scheduler point of view. + * + * As a result, this structure groups together three kinds of CPU time + * that are tracked for threads and thread groups. Most things considering * CPU time want to group these counts together and treat all three * of them in parallel. */ @@ -581,7 +596,7 @@ struct signal_struct { cputime_t gtime; cputime_t cgtime; #ifndef CONFIG_VIRT_CPU_ACCOUNTING - cputime_t prev_utime, prev_stime; + struct cputime prev_cputime; #endif unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; @@ -1340,7 +1355,7 @@ struct task_struct { cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; #ifndef CONFIG_VIRT_CPU_ACCOUNTING - cputime_t prev_utime, prev_stime; + struct cputime prev_cputime; #endif unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* monotonic time */ -- cgit v1.2.3