From 9c63a650bb100e7553d60c991ba0c5db9c743239 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 31 Oct 2012 01:29:52 -0400 Subject: [PATCH 1/8] tools/power/x86/turbostat: share kernel MSR #defines Now that turbostat is built in the kernel tree, it can share MSR #defines with the kernel. Signed-off-by: Len Brown Cc: x86@kernel.org --- arch/x86/include/asm/msr-index.h | 14 ++++++++++++++ tools/power/x86/turbostat/Makefile | 1 + tools/power/x86/turbostat/turbostat.c | 26 +++++++------------------- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 7f0edceb756..2639f816628 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -35,11 +35,14 @@ #define MSR_IA32_PERFCTR0 0x000000c1 #define MSR_IA32_PERFCTR1 0x000000c2 #define MSR_FSB_FREQ 0x000000cd +#define MSR_NHM_PLATFORM_INFO 0x000000ce #define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C1_AUTO_DEMOTE (1UL << 26) #define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) +#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) #define MSR_MTRRcap 0x000000fe #define MSR_IA32_BBL_CR_CTL 0x00000119 @@ -55,6 +58,8 @@ #define MSR_OFFCORE_RSP_0 0x000001a6 #define MSR_OFFCORE_RSP_1 0x000001a7 +#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad +#define MSR_IVT_TURBO_RATIO_LIMIT 0x000001ae #define MSR_LBR_SELECT 0x000001c8 #define MSR_LBR_TOS 0x000001c9 @@ -103,6 +108,15 @@ #define MSR_IA32_MC0_ADDR 0x00000402 #define MSR_IA32_MC0_MISC 0x00000403 +/* C-state Residency Counters */ +#define MSR_PKG_C3_RESIDENCY 0x000003f8 +#define MSR_PKG_C6_RESIDENCY 0x000003f9 +#define MSR_PKG_C7_RESIDENCY 0x000003fa +#define MSR_CORE_C3_RESIDENCY 0x000003fc +#define MSR_CORE_C6_RESIDENCY 0x000003fd +#define MSR_CORE_C7_RESIDENCY 0x000003fe +#define MSR_PKG_C2_RESIDENCY 0x0000060d + #define MSR_AMD64_MC0_MASK 0xc0010044 #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index f8564955419..51880e8467b 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -1,5 +1,6 @@ turbostat : turbostat.c CFLAGS += -Wall +CFLAGS += -I../../../../arch/x86/include/ clean : rm -f turbostat diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ea095abbe97..3c063a00f3b 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -20,6 +20,7 @@ */ #define _GNU_SOURCE +#include #include #include #include @@ -35,19 +36,6 @@ #include #include -#define MSR_NEHALEM_PLATFORM_INFO 0xCE -#define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD -#define MSR_IVT_TURBO_RATIO_LIMIT 0x1AE -#define MSR_APERF 0xE8 -#define MSR_MPERF 0xE7 -#define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */ -#define MSR_PKG_C3_RESIDENCY 0x3F8 -#define MSR_PKG_C6_RESIDENCY 0x3F9 -#define MSR_PKG_C7_RESIDENCY 0x3FA /* SNB only */ -#define MSR_CORE_C3_RESIDENCY 0x3FC -#define MSR_CORE_C6_RESIDENCY 0x3FD -#define MSR_CORE_C7_RESIDENCY 0x3FE /* SNB only */ - char *proc_stat = "/proc/stat"; unsigned int interval_sec = 5; /* set with -i interval_sec */ unsigned int verbose; /* set with -v */ @@ -674,9 +662,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) t->tsc = rdtsc(); /* we are running on local CPU of interest */ if (has_aperf) { - if (get_msr(cpu, MSR_APERF, &t->aperf)) + if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) return -3; - if (get_msr(cpu, MSR_MPERF, &t->mperf)) + if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) return -4; } @@ -742,10 +730,10 @@ void print_verbose_header(void) if (!do_nehalem_platform_info) return; - get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); + get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); if (verbose > 1) - fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); + fprintf(stderr, "MSR_NHM_PLATFORM_INFO: 0x%llx\n", msr); ratio = (msr >> 40) & 0xFF; fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", @@ -808,10 +796,10 @@ print_nhm_turbo_ratio_limits: if (!do_nehalem_turbo_ratio_limit) return; - get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); + get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); if (verbose > 1) - fprintf(stderr, "MSR_NEHALEM_TURBO_RATIO_LIMIT: 0x%llx\n", msr); + fprintf(stderr, "MSR_NHM_TURBO_RATIO_LIMIT: 0x%llx\n", msr); ratio = (msr >> 56) & 0xFF; if (ratio) From 3fc808aaa052dec7b155f3242c6c0eabf0c49127 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 31 Oct 2012 20:47:40 -0400 Subject: [PATCH 2/8] x86 power: define RAPL MSRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Run Time Average Power Limiting interface is currently model specific, present on Sandy Bridge and Ivy Bridge processors. These #defines correspond to documentation in the latest "IntelĀ® 64 and IA-32 Architectures Software Developer Manual", plus some typos in that document corrected. Signed-off-by: Len Brown Cc: x86@kernel.org --- arch/x86/include/asm/msr-index.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 2639f816628..4a4abaea04d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -117,6 +117,29 @@ #define MSR_CORE_C7_RESIDENCY 0x000003fe #define MSR_PKG_C2_RESIDENCY 0x0000060d +/* Run Time Average Power Limiting (RAPL) Interface */ + +#define MSR_RAPL_POWER_UNIT 0x00000606 + +#define MSR_PKG_POWER_LIMIT 0x00000610 +#define MSR_PKG_ENERGY_STATUS 0x00000611 +#define MSR_PKG_PERF_STATUS 0x00000613 +#define MSR_PKG_POWER_INFO 0x00000614 + +#define MSR_DRAM_POWER_LIMIT 0x00000618 +#define MSR_DRAM_ENERGY_STATUS 0x00000619 +#define MSR_DRAM_PERF_STATUS 0x0000061b +#define MSR_DRAM_POWER_INFO 0x0000061c + +#define MSR_PP0_POWER_LIMIT 0x00000638 +#define MSR_PP0_ENERGY_STATUS 0x00000639 +#define MSR_PP0_POLICY 0x0000063a +#define MSR_PP0_PERF_STATUS 0x0000063b + +#define MSR_PP1_POWER_LIMIT 0x00000640 +#define MSR_PP1_ENERGY_STATUS 0x00000641 +#define MSR_PP1_POLICY 0x00000642 + #define MSR_AMD64_MC0_MASK 0xc0010044 #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) From e52966c084f9d9ea12be2ac7df801d610d4a19a5 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 8 Nov 2012 22:38:05 -0500 Subject: [PATCH 3/8] tools/power turbostat: prevent infinite loop on migration error path Turbostat assumed if it can't migrate to a CPU, then the CPU must have gone off-line and turbostat should re-initialize with the new topology. But if turbostat can not migrate because it is restricted by a cpuset, then it will fail to migrate even after re-initialization, resulting in an infinite loop. Spit out a warning when we can't migrate and endure only 2 re-initialize cycles in a row before giving up and exiting. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 3c063a00f3b..77e76b11382 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -656,8 +656,10 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int cpu = t->cpu_id; - if (cpu_migrate(cpu)) + if (cpu_migrate(cpu)) { + fprintf(stderr, "Could not migrate to CPU %d\n", cpu); return -1; + } t->tsc = rdtsc(); /* we are running on local CPU of interest */ @@ -1088,15 +1090,22 @@ int mark_cpu_present(int cpu) void turbostat_loop() { int retval; + int restarted = 0; restart: + restarted++; + retval = for_all_cpus(get_counters, EVEN_COUNTERS); if (retval < -1) { exit(retval); } else if (retval == -1) { + if (restarted > 1) { + exit(retval); + } re_initialize(); goto restart; } + restarted = 0; gettimeofday(&tv_even, (struct timezone *)NULL); while (1) { From ddac0d6872b15fc5311d44021b8898ec6720bdec Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 30 Nov 2012 01:01:40 -0500 Subject: [PATCH 4/8] tools/power turbostat: fix output buffering issue In periodic mode, turbostat writes to stdout, but users were un-able to re-direct stdout, eg. turbostat > outputfile would result in an empty outputfile. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 77e76b11382..cb031472bed 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -423,6 +423,7 @@ done: void flush_stdout() { fputs(output_buffer, stdout); + fflush(stdout); outp = output_buffer; } void flush_stderr() From 889facbee3e67dbc8eb29d8ee7fd66d33a647bfc Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 8 Nov 2012 00:48:57 -0500 Subject: [PATCH 5/8] tools/power turbostat: v3.0: monitor Watts and Temperature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Show power in Watts and temperature in Celsius when hardware support is present. Intel's Sandy Bridge and Ivy Bridge processor generations support RAPL (Run-Time-Average-Power-Limiting). Per the Intel SDM (IntelĀ® 64 and IA-32 Architectures Software Developer Manual) RAPL provides hardware energy counters and power control MSRs (Model Specific Registers). RAPL MSRs are designed primarily as a method to implement power capping. However, they are useful for monitoring system power whether or not power capping is used. In addition, Turbostat now shows temperature from DTS (Digital Thermal Sensor) and PTM (Package Thermal Monitor) hardware, if present. As before, turbostat reads MSRs, and never writes MSRs. New columns are present in turbostat output: The Pkg_W column shows Watts for each package (socket) in the system. On multi-socket systems, the system summary on the 1st row shows the sum for all sockets together. The Cor_W column shows Watts due to processors cores. Note that Core_W is included in Pkg_W. The optional GFX_W column shows Watts due to the graphics "un-core". Note that GFX_W is included in Pkg_W. The optional RAM_W column on server processors shows Watts due to DRAM DIMMS. As DRAM DIMMs are outside the processor package, RAM_W is not included in Pkg_W. The optional PKG_% and RAM_% columns on server processors shows the % of time in the measurement interval that RAPL power limiting is in effect on the package and on DRAM. Note that the RAPL energy counters have some limitations. First, hardware updates the counters about once every milli-second. This is fine for typical turbostat measurement intervals > 1 sec. However, when turbostat is used to measure events that approach 1ms, the counters are less useful. Second, the 32-bit energy counters are subject to wrapping. For example, a counter incrementing 15 micro-Joule units on a 130 Watt TDP server processor could (in theory) roll over in about 9 minutes. Turbostat detects and handles up to 1 counter overflow per measurement interval. But when the measurement interval exceeds the guaranteed counter range, we can't detect if more than 1 overflow occured. So in this case turbostat indicates that the results are in question by replacing the fractional part of the Watts in the output with "**": Pkg_W Cor_W GFX_W 3** 0** 0** Third, the RAPL counters are energy (Joule) counters -- they sum up weighted events in the package to estimate energy consumed. They are not analong power (Watt) meters. In practice, they tend to under-count because they don't cover every possible use of energy in the package. The accuracy of the RAPL counters will vary between product generations, and between SKU's in the same product generation, and with temperature. turbostat's -v (verbose) option now displays more power and thermal configuration information -- as shown on the turbostat.8 manual page. For example, it now displays the Package and DRAM Thermal Design Power (TDP): cpu0: MSR_PKG_POWER_INFO: 0x2f064001980410 (130 W TDP, RAPL 51 - 200 W, 0.045898 sec.) cpu0: MSR_DRAM_POWER_INFO,: 0x28025800780118 (35 W TDP, RAPL 15 - 75 W, 0.039062 sec.) cpu8: MSR_PKG_POWER_INFO: 0x2f064001980410 (130 W TDP, RAPL 51 - 200 W, 0.045898 sec.) cpu8: MSR_DRAM_POWER_INFO,: 0x28025800780118 (35 W TDP, RAPL 15 - 75 W, 0.039062 sec.) Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 103 +++-- tools/power/x86/turbostat/turbostat.c | 643 +++++++++++++++++++++++++- 2 files changed, 690 insertions(+), 56 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index e4d0690cccf..0d7dc2cfefb 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -11,16 +11,16 @@ turbostat \- Report processor frequency and idle statistics .RB [ Options ] .RB [ "\-i interval_sec" ] .SH DESCRIPTION -\fBturbostat \fP reports processor topology, frequency -and idle power state statistics on modern X86 processors. +\fBturbostat \fP reports processor topology, frequency, +idle power-state statistics, temperature and power on modern X86 processors. Either \fBcommand\fP is forked and statistics are printed upon its completion, or statistics are printed periodically. \fBturbostat \fP -requires that the processor +must be run on root, and +minimally requires that the processor supports an "invariant" TSC, plus the APERF and MPERF MSRs. -\fBturbostat \fP will report idle cpu power state residency -on processors that additionally support C-state residency counters. +Additional information is reported depending on hardware counter support. .SS Options The \fB-p\fP option limits output to the 1st thread in 1st core of each package. @@ -57,7 +57,15 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T \fBGHz\fP average clock rate while the CPU was in c0 state. \fBTSC\fP average GHz that the TSC ran during the entire interval. \fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states. +\fBCTMP\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. +\fBPTMP\fP Degrees Celsius reported by the per-package Package Thermal Monitor. \fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states. +\fBPkg_W\fP Watts consumed by the whole package. +\fBCor_W\fP Watts consumed by the core part of the package. +\fBGFX_W\fP Watts consumed by the Graphics part of the package -- available only on client processors. +\fBRAM_W\fP Watts consumed by the DRAM DIMMS -- available only on server processors. +\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. +\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. .fi .PP .SH EXAMPLE @@ -66,50 +74,73 @@ Without any parameters, turbostat prints out counters ever 5 seconds. for turbostat to fork). The first row of statistics is a summary for the entire system. -Note that the summary is a weighted average. +For residency % columns, the summary is a weighted average. +For Temperature columns, the summary is the column maximum. +For Watts columns, the summary is a system total. Subsequent rows show per-CPU statistics. .nf -[root@x980]# ./turbostat -cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 - 0.09 1.62 3.38 1.83 0.32 97.76 1.26 83.61 - 0 0 0.15 1.62 3.38 10.23 0.05 89.56 1.26 83.61 - 0 6 0.05 1.62 3.38 10.34 - 1 2 0.03 1.62 3.38 0.07 0.05 99.86 - 1 8 0.03 1.62 3.38 0.06 - 2 4 0.21 1.62 3.38 0.10 1.49 98.21 - 2 10 0.02 1.62 3.38 0.29 - 8 1 0.04 1.62 3.38 0.04 0.08 99.84 - 8 7 0.01 1.62 3.38 0.06 - 9 3 0.53 1.62 3.38 0.10 0.20 99.17 - 9 9 0.02 1.62 3.38 0.60 - 10 5 0.01 1.62 3.38 0.02 0.04 99.92 - 10 11 0.02 1.62 3.38 0.02 +[root@sandy]# ./turbostat +cor CPU %c0 GHz TSC %c1 %c3 %c6 %c7 CTMP PTMP %pc2 %pc3 %pc6 %pc7 Pkg_W Cor_W GFX_W + 0.06 0.80 2.29 0.11 0.00 0.00 99.83 47 40 0.26 0.01 0.44 98.78 3.49 0.12 0.14 + 0 0 0.07 0.80 2.29 0.07 0.00 0.00 99.86 40 40 0.26 0.01 0.44 98.78 3.49 0.12 0.14 + 0 4 0.03 0.80 2.29 0.12 + 1 1 0.04 0.80 2.29 0.25 0.01 0.00 99.71 40 + 1 5 0.16 0.80 2.29 0.13 + 2 2 0.05 0.80 2.29 0.06 0.01 0.00 99.88 40 + 2 6 0.03 0.80 2.29 0.08 + 3 3 0.05 0.80 2.29 0.08 0.00 0.00 99.87 47 + 3 7 0.04 0.84 2.29 0.09 .fi .SH SUMMARY EXAMPLE The "-s" option prints the column headers just once, and then the one line system summary for each sample interval. .nf -[root@x980]# ./turbostat -s - %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 - 0.23 1.67 3.38 2.00 0.30 97.47 1.07 82.12 - 0.10 1.62 3.38 1.87 2.25 95.77 12.02 72.60 - 0.20 1.64 3.38 1.98 0.11 97.72 0.30 83.36 - 0.11 1.70 3.38 1.86 1.81 96.22 9.71 74.90 +[root@wsm]# turbostat -S + %c0 GHz TSC %c1 %c3 %c6 CTMP %pc3 %pc6 + 1.40 2.81 3.38 10.78 43.47 44.35 42 13.67 2.09 + 1.34 2.90 3.38 11.48 58.96 28.23 41 19.89 0.15 + 1.55 2.72 3.38 26.73 37.66 34.07 42 2.53 2.80 + 1.37 2.83 3.38 16.95 60.05 21.63 42 5.76 0.20 .fi .SH VERBOSE EXAMPLE The "-v" option adds verbosity to the output: .nf -GenuineIntel 11 CPUID levels; family:model:stepping 0x6:2c:2 (6:44:2) -12 * 133 = 1600 MHz max efficiency -25 * 133 = 3333 MHz TSC frequency -26 * 133 = 3467 MHz max turbo 4 active cores -26 * 133 = 3467 MHz max turbo 3 active cores -27 * 133 = 3600 MHz max turbo 2 active cores -27 * 133 = 3600 MHz max turbo 1 active cores - +[root@ivy]# turbostat -v +turbostat v3.0 November 23, 2012 - Len Brown +CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9) +CPUID(6): APERF, DTS, PTM, EPB +RAPL: 851 sec. Joule Counter Range +cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300 +16 * 100 = 1600 MHz max efficiency +35 * 100 = 3500 MHz TSC frequency +cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6-noret) +cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 +37 * 100 = 3700 MHz max turbo 4 active cores +38 * 100 = 3800 MHz max turbo 3 active cores +39 * 100 = 3900 MHz max turbo 2 active cores +39 * 100 = 3900 MHz max turbo 1 active cores +cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced) +cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.) +cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.) +cpu0: MSR_PKG_POWER_LIMIT: 0x830000148268 (UNlocked) +cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled) +cpu0: PKG Limit #2: ENabled (96.000000 Watts, 0.000977* sec, clamp DISabled) +cpu0: MSR_PP0_POLICY: 0 +cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked) +cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) +cpu0: MSR_PP1_POLICY: 0 +cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked) +cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) +cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C) +cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C) +cpu0: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) +cpu1: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) +cpu2: MSR_IA32_THERM_STATUS: 0x88540000 (21 C +/- 1) +cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1) + ... .fi The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency available at the minimum package voltage. The \fBTSC frequency\fP is the nominal @@ -142,7 +173,7 @@ cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 10 5 1.42 3.43 3.38 2.14 30.99 65.44 10 11 0.16 2.88 3.38 3.40 .fi -Above the cycle soaker drives cpu7 up its 3.6 Ghz turbo limit +Above the cycle soaker drives cpu7 up its 3.6 GHz turbo limit while the other processors are generally in various states of idle. Note that cpu1 and cpu7 are HT siblings within core8. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index cb031472bed..ce6d46038f7 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -39,12 +39,15 @@ char *proc_stat = "/proc/stat"; unsigned int interval_sec = 5; /* set with -i interval_sec */ unsigned int verbose; /* set with -v */ +unsigned int rapl_verbose; /* set with -R */ +unsigned int thermal_verbose; /* set with -T */ unsigned int summary_only; /* set with -s */ unsigned int skip_c0; unsigned int skip_c1; unsigned int do_nhm_cstates; unsigned int do_snb_cstates; unsigned int has_aperf; +unsigned int has_epb; unsigned int units = 1000000000; /* Ghz etc */ unsigned int genuine_intel; unsigned int has_invariant_tsc; @@ -62,6 +65,23 @@ unsigned int show_cpu; unsigned int show_pkg_only; unsigned int show_core_only; char *output_buffer, *outp; +unsigned int do_rapl; +unsigned int do_dts; +unsigned int do_ptm; +unsigned int tcc_activation_temp; +unsigned int tcc_activation_temp_override; +double rapl_power_units, rapl_energy_units, rapl_time_units; +double rapl_joule_counter_range; + +#define RAPL_PKG (1 << 0) +#define RAPL_CORES (1 << 1) +#define RAPL_GFX (1 << 2) +#define RAPL_DRAM (1 << 3) +#define RAPL_PKG_PERF_STATUS (1 << 4) +#define RAPL_DRAM_PERF_STATUS (1 << 5) +#define TJMAX_DEFAULT 100 + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) int aperf_mperf_unstable; int backwards_count; @@ -89,6 +109,7 @@ struct core_data { unsigned long long c3; unsigned long long c6; unsigned long long c7; + unsigned int core_temp_c; unsigned int core_id; } *core_even, *core_odd; @@ -98,6 +119,14 @@ struct pkg_data { unsigned long long pc6; unsigned long long pc7; unsigned int package_id; + unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ + unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ + unsigned int energy_cores; /* MSR_PP0_ENERGY_STATUS */ + unsigned int energy_gfx; /* MSR_PP1_ENERGY_STATUS */ + unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ + unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ + unsigned int pkg_temp_c; + } *package_even, *package_odd; #define ODD_COUNTERS thread_odd, core_odd, package_odd @@ -235,6 +264,12 @@ void print_header(void) outp += sprintf(outp, " %%c6"); if (do_snb_cstates) outp += sprintf(outp, " %%c7"); + + if (do_dts) + outp += sprintf(outp, " CTMP"); + if (do_ptm) + outp += sprintf(outp, " PTMP"); + if (do_snb_cstates) outp += sprintf(outp, " %%pc2"); if (do_nhm_cstates) @@ -244,6 +279,19 @@ void print_header(void) if (do_snb_cstates) outp += sprintf(outp, " %%pc7"); + if (do_rapl & RAPL_PKG) + outp += sprintf(outp, " Pkg_W"); + if (do_rapl & RAPL_CORES) + outp += sprintf(outp, " Cor_W"); + if (do_rapl & RAPL_GFX) + outp += sprintf(outp, " GFX_W"); + if (do_rapl & RAPL_DRAM) + outp += sprintf(outp, " RAM_W"); + if (do_rapl & RAPL_PKG_PERF_STATUS) + outp += sprintf(outp, " PKG_%%"); + if (do_rapl & RAPL_DRAM_PERF_STATUS) + outp += sprintf(outp, " RAM_%%"); + outp += sprintf(outp, "\n"); } @@ -273,6 +321,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, fprintf(stderr, "c3: %016llX\n", c->c3); fprintf(stderr, "c6: %016llX\n", c->c6); fprintf(stderr, "c7: %016llX\n", c->c7); + fprintf(stderr, "DTS: %dC\n", c->core_temp_c); } if (p) { @@ -281,6 +330,13 @@ int dump_counters(struct thread_data *t, struct core_data *c, fprintf(stderr, "pc3: %016llX\n", p->pc3); fprintf(stderr, "pc6: %016llX\n", p->pc6); fprintf(stderr, "pc7: %016llX\n", p->pc7); + fprintf(stderr, "Joules PKG: %0X\n", p->energy_pkg); + fprintf(stderr, "Joules COR: %0X\n", p->energy_cores); + fprintf(stderr, "Joules GFX: %0X\n", p->energy_gfx); + fprintf(stderr, "Joules RAM: %0X\n", p->energy_dram); + fprintf(stderr, "Throttle PKG: %0X\n", p->rapl_pkg_perf_status); + fprintf(stderr, "Throttle RAM: %0X\n", p->rapl_dram_perf_status); + fprintf(stderr, "PTM: %dC\n", p->pkg_temp_c); } return 0; } @@ -290,14 +346,21 @@ int dump_counters(struct thread_data *t, struct core_data *c, * package: "pk" 2 columns %2d * core: "cor" 3 columns %3d * CPU: "CPU" 3 columns %3d + * Pkg_W: %6.2 + * Cor_W: %6.2 + * GFX_W: %5.2 + * RAM_W: %5.2 * GHz: "GHz" 3 columns %3.2 * TSC: "TSC" 3 columns %3.2 * percentage " %pc3" %6.2 + * Perf Status percentage: %5.2 + * "CTMP" 4 columns %4d */ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { double interval_float; + char *fmt5, *fmt6; /* if showing only 1st thread in core and this isn't one, bail out */ if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) @@ -337,7 +400,6 @@ int format_counters(struct thread_data *t, struct core_data *c, if (show_cpu) outp += sprintf(outp, " %3d", t->cpu_id); } - /* %c0 */ if (do_nhm_cstates) { if (show_pkg || show_core || show_cpu) @@ -402,10 +464,16 @@ int format_counters(struct thread_data *t, struct core_data *c, if (do_snb_cstates) outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); + if (do_dts) + outp += sprintf(outp, " %4d", c->core_temp_c); + /* print per-package data only for 1st core in package */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) goto done; + if (do_ptm) + outp += sprintf(outp, " %4d", p->pkg_temp_c); + if (do_snb_cstates) outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); if (do_nhm_cstates) @@ -414,6 +482,32 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); if (do_snb_cstates) outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); + + /* + * If measurement interval exceeds minimum RAPL Joule Counter range, + * indicate that results are suspect by printing "**" in fraction place. + */ + if (interval_float < rapl_joule_counter_range) { + fmt5 = " %5.2f"; + fmt6 = " %6.2f"; + } else { + fmt5 = " %3.0f**"; + fmt6 = " %4.0f**"; + } + + if (do_rapl & RAPL_PKG) + outp += sprintf(outp, fmt6, p->energy_pkg * rapl_energy_units / interval_float); + if (do_rapl & RAPL_CORES) + outp += sprintf(outp, fmt6, p->energy_cores * rapl_energy_units / interval_float); + if (do_rapl & RAPL_GFX) + outp += sprintf(outp, fmt5, p->energy_gfx * rapl_energy_units / interval_float); + if (do_rapl & RAPL_DRAM) + outp += sprintf(outp, fmt5, p->energy_dram * rapl_energy_units / interval_float); + if (do_rapl & RAPL_PKG_PERF_STATUS ) + outp += sprintf(outp, fmt5, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); + if (do_rapl & RAPL_DRAM_PERF_STATUS ) + outp += sprintf(outp, fmt5, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); + done: outp += sprintf(outp, "\n"); @@ -450,6 +544,13 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ for_all_cpus(format_counters, t, c, p); } +#define DELTA_WRAP32(new, old) \ + if (new > old) { \ + old = new - old; \ + } else { \ + old = 0x100000000 + new - old; \ + } + void delta_package(struct pkg_data *new, struct pkg_data *old) { @@ -457,6 +558,14 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->pc3 = new->pc3 - old->pc3; old->pc6 = new->pc6 - old->pc6; old->pc7 = new->pc7 - old->pc7; + old->pkg_temp_c = new->pkg_temp_c; + + DELTA_WRAP32(new->energy_pkg, old->energy_pkg); + DELTA_WRAP32(new->energy_cores, old->energy_cores); + DELTA_WRAP32(new->energy_gfx, old->energy_gfx); + DELTA_WRAP32(new->energy_dram, old->energy_dram); + DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status); + DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status); } void @@ -465,6 +574,7 @@ delta_core(struct core_data *new, struct core_data *old) old->c3 = new->c3 - old->c3; old->c6 = new->c6 - old->c6; old->c7 = new->c7 - old->c7; + old->core_temp_c = new->core_temp_c; } /* @@ -571,11 +681,20 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data c->c3 = 0; c->c6 = 0; c->c7 = 0; + c->core_temp_c = 0; p->pc2 = 0; p->pc3 = 0; p->pc6 = 0; p->pc7 = 0; + + p->energy_pkg = 0; + p->energy_dram = 0; + p->energy_cores = 0; + p->energy_gfx = 0; + p->rapl_pkg_perf_status = 0; + p->rapl_dram_perf_status = 0; + p->pkg_temp_c = 0; } int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) @@ -596,6 +715,8 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.cores.c6 += c->c6; average.cores.c7 += c->c7; + average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); + /* sum per-pkg values only for 1st core in pkg */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; @@ -605,6 +726,15 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.packages.pc6 += p->pc6; average.packages.pc7 += p->pc7; + average.packages.energy_pkg += p->energy_pkg; + average.packages.energy_dram += p->energy_dram; + average.packages.energy_cores += p->energy_cores; + average.packages.energy_gfx += p->energy_gfx; + + average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); + + average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; + average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status; return 0; } /* @@ -656,6 +786,7 @@ static unsigned long long rdtsc(void) int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int cpu = t->cpu_id; + unsigned long long msr; if (cpu_migrate(cpu)) { fprintf(stderr, "Could not migrate to CPU %d\n", cpu); @@ -672,9 +803,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) } if (extra_delta_offset32) { - if (get_msr(cpu, extra_delta_offset32, &t->extra_delta32)) + if (get_msr(cpu, extra_delta_offset32, &msr)) return -5; - t->extra_delta32 &= 0xFFFFFFFF; + t->extra_delta32 = msr & 0xFFFFFFFF; } if (extra_delta_offset64) @@ -682,9 +813,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -5; if (extra_msr_offset32) { - if (get_msr(cpu, extra_msr_offset32, &t->extra_msr32)) + if (get_msr(cpu, extra_msr_offset32, &msr)) return -5; - t->extra_msr32 &= 0xFFFFFFFF; + t->extra_msr32 = msr & 0xFFFFFFFF; } if (extra_msr_offset64) @@ -706,6 +837,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) return -8; + if (do_dts) { + if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) + return -9; + c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); + } + + /* collect package counters only for 1st core in package */ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; @@ -722,6 +860,41 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) return -12; } + if (do_rapl & RAPL_PKG) { + if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) + return -13; + p->energy_pkg = msr & 0xFFFFFFFF; + } + if (do_rapl & RAPL_CORES) { + if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr)) + return -14; + p->energy_cores = msr & 0xFFFFFFFF; + } + if (do_rapl & RAPL_DRAM) { + if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) + return -15; + p->energy_dram = msr & 0xFFFFFFFF; + } + if (do_rapl & RAPL_GFX) { + if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr)) + return -16; + p->energy_gfx = msr & 0xFFFFFFFF; + } + if (do_rapl & RAPL_PKG_PERF_STATUS) { + if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr)) + return -16; + p->rapl_pkg_perf_status = msr & 0xFFFFFFFF; + } + if (do_rapl & RAPL_DRAM_PERF_STATUS) { + if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr)) + return -16; + p->rapl_dram_perf_status = msr & 0xFFFFFFFF; + } + if (do_ptm) { + if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) + return -17; + p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); + } return 0; } @@ -735,8 +908,8 @@ void print_verbose_header(void) get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); - if (verbose > 1) - fprintf(stderr, "MSR_NHM_PLATFORM_INFO: 0x%llx\n", msr); + if (verbose) + fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); ratio = (msr >> 40) & 0xFF; fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", @@ -751,8 +924,8 @@ void print_verbose_header(void) get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); - if (verbose > 1) - fprintf(stderr, "MSR_IVT_TURBO_RATIO_LIMIT: 0x%llx\n", msr); + if (verbose) + fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); ratio = (msr >> 56) & 0xFF; if (ratio) @@ -795,14 +968,56 @@ void print_verbose_header(void) ratio, bclk, ratio * bclk); print_nhm_turbo_ratio_limits: + get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + +#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) + + fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); + + fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ", + (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", + (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", + (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", + (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", + (msr & (1 << 15)) ? "" : "UN", + (unsigned int)msr & 7); + + + switch(msr & 0x7) { + case 0: + fprintf(stderr, "pc0"); + break; + case 1: + fprintf(stderr, do_snb_cstates ? "pc2" : "pc0"); + break; + case 2: + fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3"); + break; + case 3: + fprintf(stderr, "pc6"); + break; + case 4: + fprintf(stderr, "pc7"); + break; + case 5: + fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid"); + break; + case 7: + fprintf(stderr, "unlimited"); + break; + default: + fprintf(stderr, "invalid"); + } + fprintf(stderr, ")\n"); if (!do_nehalem_turbo_ratio_limit) return; get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); - if (verbose > 1) - fprintf(stderr, "MSR_NHM_TURBO_RATIO_LIMIT: 0x%llx\n", msr); + if (verbose) + fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); ratio = (msr >> 56) & 0xFF; if (ratio) @@ -1205,6 +1420,299 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) } } +/* + * print_epb() + * Decode the ENERGY_PERF_BIAS MSR + */ +int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + char *epb_string; + int cpu; + + if (!has_epb) + return 0; + + cpu = t->cpu_id; + + /* EPB is per-package */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + if (cpu_migrate(cpu)) { + fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) + return 0; + + switch (msr & 0x7) { + case ENERGY_PERF_BIAS_PERFORMANCE: + epb_string = "performance"; + break; + case ENERGY_PERF_BIAS_NORMAL: + epb_string = "balanced"; + break; + case ENERGY_PERF_BIAS_POWERSAVE: + epb_string = "powersave"; + break; + default: + epb_string = "custom"; + break; + } + fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); + + return 0; +} + +#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ +#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ + +/* + * rapl_probe() + * + * sets do_rapl + */ +void rapl_probe(unsigned int family, unsigned int model) +{ + unsigned long long msr; + double tdp; + + if (!genuine_intel) + return; + + if (family != 6) + return; + + switch (model) { + case 0x2A: + case 0x3A: + do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX; + break; + case 0x2D: + case 0x3E: + do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS; + break; + default: + return; + } + + /* units on package 0, verify later other packages match */ + if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr)) + return; + + rapl_power_units = 1.0 / (1 << (msr & 0xF)); + rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); + rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); + + /* get TDP to determine energy counter range */ + if (get_msr(0, MSR_PKG_POWER_INFO, &msr)) + return; + + tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; + + rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; + + if (verbose) + fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range); + + return; +} + +int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + unsigned int dts; + int cpu; + + if (!(do_dts || do_ptm)) + return 0; + + cpu = t->cpu_id; + + /* DTS is per-core, no need to print for each thread */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + return 0; + + if (cpu_migrate(cpu)) { + fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) { + if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", + cpu, msr, tcc_activation_temp - dts); + +#ifdef THERM_DEBUG + if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + dts2 = (msr >> 8) & 0x7F; + fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", + cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); +#endif + } + + + if (do_dts) { + unsigned int resolution; + + if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + resolution = (msr >> 27) & 0xF; + fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", + cpu, msr, tcc_activation_temp - dts, resolution); + +#ifdef THERM_DEBUG + if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + dts2 = (msr >> 8) & 0x7F; + fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", + cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); +#endif + } + + return 0; +} + +void print_power_limit_msr(int cpu, unsigned long long msr, char *label) +{ + fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", + cpu, label, + ((msr >> 15) & 1) ? "EN" : "DIS", + ((msr >> 0) & 0x7FFF) * rapl_power_units, + (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, + (((msr >> 16) & 1) ? "EN" : "DIS")); + + return; +} + +int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + int cpu; + double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units; + + if (!do_rapl) + return 0; + + /* RAPL counters are per package, so print only for 1st thread/package */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + cpu = t->cpu_id; + if (cpu_migrate(cpu)) { + fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) + return -1; + + local_rapl_power_units = 1.0 / (1 << (msr & 0xF)); + local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); + local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); + + if (local_rapl_power_units != rapl_power_units) + fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu); + if (local_rapl_energy_units != rapl_energy_units) + fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu); + if (local_rapl_time_units != rapl_time_units) + fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu); + + if (verbose) { + fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " + "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, + local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units); + } + if (do_rapl & RAPL_PKG) { + if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) + return -5; + + + fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", + cpu, msr, + ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); + + if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) + return -9; + + fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", + cpu, msr, (msr >> 63) & 1 ? "": "UN"); + + print_power_limit_msr(cpu, msr, "PKG Limit #1"); + fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", + cpu, + ((msr >> 47) & 1) ? "EN" : "DIS", + ((msr >> 32) & 0x7FFF) * rapl_power_units, + (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, + ((msr >> 48) & 1) ? "EN" : "DIS"); + } + + if (do_rapl & RAPL_DRAM) { + if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) + return -6; + + + fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", + cpu, msr, + ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); + + + if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) + return -9; + fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", + cpu, msr, (msr >> 31) & 1 ? "": "UN"); + + print_power_limit_msr(cpu, msr, "DRAM Limit"); + } + if (do_rapl & RAPL_CORES) { + if (verbose) { + if (get_msr(cpu, MSR_PP0_POLICY, &msr)) + return -7; + + fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); + + if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) + return -9; + fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", + cpu, msr, (msr >> 31) & 1 ? "": "UN"); + print_power_limit_msr(cpu, msr, "Cores Limit"); + } + } + if (do_rapl & RAPL_GFX) { + if (verbose) { + if (get_msr(cpu, MSR_PP1_POLICY, &msr)) + return -8; + + fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); + + if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) + return -9; + fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", + cpu, msr, (msr >> 31) & 1 ? "": "UN"); + print_power_limit_msr(cpu, msr, "GFX Limit"); + } + } + return 0; +} + int is_snb(unsigned int family, unsigned int model) { @@ -1229,6 +1737,72 @@ double discover_bclk(unsigned int family, unsigned int model) return 133.33; } +/* + * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where + * the Thermal Control Circuit (TCC) activates. + * This is usually equal to tjMax. + * + * Older processors do not have this MSR, so there we guess, + * but also allow cmdline over-ride with -T. + * + * Several MSR temperature values are in units of degrees-C + * below this value, including the Digital Thermal Sensor (DTS), + * Package Thermal Management Sensor (PTM), and thermal event thresholds. + */ +int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + unsigned int target_c_local; + int cpu; + + /* tcc_activation_temp is used only for dts or ptm */ + if (!(do_dts || do_ptm)) + return 0; + + /* this is a per-package concept */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + cpu = t->cpu_id; + if (cpu_migrate(cpu)) { + fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (tcc_activation_temp_override != 0) { + tcc_activation_temp = tcc_activation_temp_override; + fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n", + cpu, tcc_activation_temp); + return 0; + } + + /* Temperature Target MSR is Nehalem and newer only */ + if (!do_nehalem_platform_info) + goto guess; + + if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) + goto guess; + + target_c_local = (msr >> 16) & 0x7F; + + if (verbose) + fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", + cpu, msr, target_c_local); + + if (target_c_local < 85 || target_c_local > 120) + goto guess; + + tcc_activation_temp = target_c_local; + + return 0; + +guess: + tcc_activation_temp = TJMAX_DEFAULT; + fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", + cpu, tcc_activation_temp); + + return 0; +} void check_cpuid() { unsigned int eax, ebx, ecx, edx, max_level; @@ -1242,7 +1816,7 @@ void check_cpuid() genuine_intel = 1; if (verbose) - fprintf(stderr, "%.4s%.4s%.4s ", + fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", (char *)&ebx, (char *)&edx, (char *)&ecx); asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); @@ -1293,10 +1867,19 @@ void check_cpuid() asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); has_aperf = ecx & (1 << 0); - if (!has_aperf) { - fprintf(stderr, "No APERF MSR\n"); - exit(1); - } + do_dts = eax & (1 << 0); + do_ptm = eax & (1 << 6); + has_epb = ecx & (1 << 3); + + if (verbose) + fprintf(stderr, "CPUID(6): %s%s%s%s\n", + has_aperf ? "APERF" : "No APERF!", + do_dts ? ", DTS" : "", + do_ptm ? ", PTM": "", + has_epb ? ", EPB": ""); + + if (!has_aperf) + exit(-1); do_nehalem_platform_info = genuine_intel && has_invariant_tsc; do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ @@ -1305,12 +1888,15 @@ void check_cpuid() do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); + rapl_probe(family, model); + + return; } void usage() { - fprintf(stderr, "%s: [-v][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", + fprintf(stderr, "%s: [-v][-R][-T][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", progname); exit(1); } @@ -1546,6 +2132,17 @@ void turbostat_init() if (verbose) print_verbose_header(); + + if (verbose) + for_all_cpus(print_epb, ODD_COUNTERS); + + if (verbose) + for_all_cpus(print_rapl, ODD_COUNTERS); + + for_all_cpus(set_temperature_target, ODD_COUNTERS); + + if (verbose) + for_all_cpus(print_thermal, ODD_COUNTERS); } int fork_it(char **argv) @@ -1602,7 +2199,7 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:")) != -1) { + while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:RT:")) != -1) { switch (opt) { case 'p': show_core_only++; @@ -1634,6 +2231,12 @@ void cmdline(int argc, char **argv) case 'M': sscanf(optarg, "%x", &extra_msr_offset64); break; + case 'R': + rapl_verbose++; + break; + case 'T': + tcc_activation_temp_override = atoi(optarg); + break; default: usage(); } @@ -1644,8 +2247,8 @@ int main(int argc, char **argv) { cmdline(argc, argv); - if (verbose > 1) - fprintf(stderr, "turbostat v2.1 October 6, 2012" + if (verbose) + fprintf(stderr, "turbostat v3.0 November 23, 2012" " - Len Brown \n"); turbostat_init(); From 84764a415c707b43e751deb579a421776f190a95 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 15 Nov 2012 14:02:00 -0500 Subject: [PATCH 6/8] tools/power x86_energy_perf_policy: close /proc/stat in for_every_cpu() Instead of returning out of for_every_cpu() we should break out of the loop= which will then tidy up correctly by closing the file /proc/stat. Signed-off-by: Colin Ian King Signed-off-by: Len Brown --- tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 33c5c7ee148..40b3e5482f8 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -289,7 +289,7 @@ void for_every_cpu(void (func)(int)) "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu); if (retval != 1) - return; + break; func(cpu); } From ee0778a3015370779f603d2b6119a6ec2b1c811a Mon Sep 17 00:00:00 2001 From: Mark Asselstine Date: Tue, 9 Oct 2012 11:31:44 -0400 Subject: [PATCH 7/8] tools/power: turbostat: make Makefile a bit more capable The turbostat Makefile is pretty simple, its output is placed in the same directory as the source, the install rule has no concept of a prefix or sysroot, and you can set CC to use a specific compiler but not use the more familiar CROSS_COMPILE. By making a few minor changes these limitations are removed while leaving the default behavior matching what it used to be. Example build with these changes: make CROSS_COMPILE=i686-wrs-linux-gnu- DESTDIR=/tmp install or from the tools directory make CROSS_COMPILE=i686-wrs-linux-gnu- DESTDIR=/tmp turbostat_install Signed-off-by: Mark Asselstine Signed-off-by: Len Brown --- tools/power/x86/turbostat/Makefile | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 51880e8467b..e48ef07156c 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -1,10 +1,22 @@ +CC = $(CROSS_COMPILE)gcc +BUILD_OUTPUT := $(PWD) +PREFIX := /usr +DESTDIR := + turbostat : turbostat.c CFLAGS += -Wall CFLAGS += -I../../../../arch/x86/include/ -clean : - rm -f turbostat +%: %.c + @mkdir -p $(BUILD_OUTPUT) + $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ -install : - install turbostat /usr/bin/turbostat - install turbostat.8 /usr/share/man/man8 +.PHONY : clean +clean : + @rm -f $(BUILD_OUTPUT)/turbostat + +install : turbostat + install -d $(DESTDIR)$(PREFIX)/bin + install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat + install -d $(DESTDIR)$(PREFIX)/share/man/man8 + install turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8 From 55f1f545f709a6023371848028a3029118855576 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Tue, 30 Oct 2012 13:38:05 -0400 Subject: [PATCH 8/8] tools: Allow tools to be installed in a user specified location When building x86_energy_perf_policy or turbostat within the confines of a packaging system such as RPM, we need to be able to have it install to the buildroot and not the root filesystem of the build machine. This adds a DESTDIR variable that when set will act as a prefix for the install location of these tools. Signed-off-by: Josh Boyer Signed-off-by: Len Brown --- tools/power/x86/x86_energy_perf_policy/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile index f458237fdd7..971c9ffdcb5 100644 --- a/tools/power/x86/x86_energy_perf_policy/Makefile +++ b/tools/power/x86/x86_energy_perf_policy/Makefile @@ -1,8 +1,10 @@ +DESTDIR ?= + x86_energy_perf_policy : x86_energy_perf_policy.c clean : rm -f x86_energy_perf_policy install : - install x86_energy_perf_policy /usr/bin/ - install x86_energy_perf_policy.8 /usr/share/man/man8/ + install x86_energy_perf_policy ${DESTDIR}/usr/bin/ + install x86_energy_perf_policy.8 ${DESTDIR}/usr/share/man/man8/