From 6248d0602f9932a437070dda598b7973b8770384 Mon Sep 17 00:00:00 2001
From: Gregory CLEMENT <gregory.clement@free-electrons.com>
Date: Mon, 1 Oct 2012 10:56:42 +0100
Subject: ARM: 7545/1: cache-l2x0: make outer_cache_fns a field of l2x0_of_data

Instead of having multiple functions belonging to outer_cache and
filling this structure on the fly, use a outer_cache_fns field inside
l2x0_of_data and just memcopy it into outer_cache depending of the
type of the l2x0 cache. For non DT case, the former code was kept.

[rmk: fixed a style issue]

Tested-and-Reviewed-by: Yehuda Yitschak <yehuday@marvell.com>
Tested-and-Reviewed-by: Lior Amsalem <alior@marvell.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 55 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 8a97e6443c6..db55d18691e 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -39,9 +39,11 @@ struct l2x0_regs l2x0_saved_regs;
 struct l2x0_of_data {
 	void (*setup)(const struct device_node *, u32 *, u32 *);
 	void (*save)(void);
-	void (*resume)(void);
+	struct outer_cache_fns outer_cache;
 };
 
+static bool of_init = false;
+
 static inline void cache_wait_way(void __iomem *reg, unsigned long mask)
 {
 	/* wait for cache operation by line or way to complete */
@@ -380,13 +382,15 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 	/* Save the value for resuming. */
 	l2x0_saved_regs.aux_ctrl = aux;
 
-	outer_cache.inv_range = l2x0_inv_range;
-	outer_cache.clean_range = l2x0_clean_range;
-	outer_cache.flush_range = l2x0_flush_range;
-	outer_cache.sync = l2x0_cache_sync;
-	outer_cache.flush_all = l2x0_flush_all;
-	outer_cache.inv_all = l2x0_inv_all;
-	outer_cache.disable = l2x0_disable;
+	if (!of_init) {
+		outer_cache.inv_range = l2x0_inv_range;
+		outer_cache.clean_range = l2x0_clean_range;
+		outer_cache.flush_range = l2x0_flush_range;
+		outer_cache.sync = l2x0_cache_sync;
+		outer_cache.flush_all = l2x0_flush_all;
+		outer_cache.inv_all = l2x0_inv_all;
+		outer_cache.disable = l2x0_disable;
+	}
 
 	printk(KERN_INFO "%s cache controller enabled\n", type);
 	printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n",
@@ -537,15 +541,34 @@ static void pl310_resume(void)
 }
 
 static const struct l2x0_of_data pl310_data = {
-	pl310_of_setup,
-	pl310_save,
-	pl310_resume,
+	.setup = pl310_of_setup,
+	.save  = pl310_save,
+	.outer_cache = {
+		.resume      = pl310_resume,
+		.inv_range   = l2x0_inv_range,
+		.clean_range = l2x0_clean_range,
+		.flush_range = l2x0_flush_range,
+		.sync        = l2x0_cache_sync,
+		.flush_all   = l2x0_flush_all,
+		.inv_all     = l2x0_inv_all,
+		.disable     = l2x0_disable,
+		.set_debug   = pl310_set_debug,
+	},
 };
 
 static const struct l2x0_of_data l2x0_data = {
-	l2x0_of_setup,
-	NULL,
-	l2x0_resume,
+	.setup = l2x0_of_setup,
+	.save  = NULL,
+	.outer_cache = {
+		.resume      = l2x0_resume,
+		.inv_range   = l2x0_inv_range,
+		.clean_range = l2x0_clean_range,
+		.flush_range = l2x0_flush_range,
+		.sync        = l2x0_cache_sync,
+		.flush_all   = l2x0_flush_all,
+		.inv_all     = l2x0_inv_all,
+		.disable     = l2x0_disable,
+	},
 };
 
 static const struct of_device_id l2x0_ids[] __initconst = {
@@ -585,9 +608,11 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 	if (data->save)
 		data->save();
 
+	of_init = true;
 	l2x0_init(l2x0_base, aux_val, aux_mask);
 
-	outer_cache.resume = data->resume;
+	memcpy(&outer_cache, &data->outer_cache, sizeof(outer_cache));
+
 	return 0;
 }
 #endif
-- 
cgit v1.2.3


From c3545236e8740ab556022f87685d18503c86e187 Mon Sep 17 00:00:00 2001
From: Gregory CLEMENT <gregory.clement@free-electrons.com>
Date: Mon, 1 Oct 2012 10:59:29 +0100
Subject: ARM: 7546/1: cache-l2x0: add an optional register to save/restore

Tested-and-Reviewed-by: Yehuda Yitschak <yehuday@marvell.com>
Tested-and-Reviewed-by: Lior Amsalem <alior@marvell.com>

Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/hardware/cache-l2x0.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index c4c87bc1223..5f2c7b44fda 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -126,6 +126,7 @@ struct l2x0_regs {
 	unsigned long filter_end;
 	unsigned long prefetch_ctrl;
 	unsigned long pwr_ctrl;
+	unsigned long ctrl;
 };
 
 extern struct l2x0_regs l2x0_saved_regs;
-- 
cgit v1.2.3


From 153cd8e839b5729358d4e5c3371e7509ee5ac96a Mon Sep 17 00:00:00 2001
From: Dave Martin <dave.martin@linaro.org>
Date: Tue, 16 Oct 2012 11:54:00 +0100
Subject: ARM: 7553/1: proc-v7: Ensure correct instruction set after cpu_reset

Because mov pc,<Rn> never switches instruction set when executed in
Thumb code, Thumb-2 kernels will silently execute the target code
after cpu_reset as Thumb code, even if the passed code pointer
denotes ARM (bit 0 clear).

This patch uses bx instead, ensuring the correct instruction set
for the target code.

Thumb code in the kernel is not supported prior to ARMv7, so other
CPUs are not affected.

Signed-off-by: Dave Martin <dave.martin@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/proc-v7.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 846d279f317..42cc833aa02 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -57,7 +57,7 @@ ENTRY(cpu_v7_reset)
  THUMB(	bic	r1, r1, #1 << 30 )		@ SCTLR.TE (Thumb exceptions)
 	mcr	p15, 0, r1, c1, c0, 0		@ disable MMU
 	isb
-	mov	pc, r0
+	bx	r0
 ENDPROC(cpu_v7_reset)
 	.popsection
 
-- 
cgit v1.2.3


From 871df85a592396b36d4c40b3860e8d7373626552 Mon Sep 17 00:00:00 2001
From: fwu <fwu@marvell.com>
Date: Sat, 29 Sep 2012 04:14:03 +0100
Subject: ARM: 7544/1: Add BUG_ON when hlt counter is wrongly used

1. On ARM platform, "nohlt" can be used to prevent core from idle
   process, returning immediately.
2. There are two interfaces, exported for other modules, named
   "disable_hlt" and "enable_hlt" are used to enable/disable the
   cpuidle mechanism by increasing/decreasing "hlt_counter".
   Disable_hlt and enable_hlt are paired operation,
   when you first call disable_hlt and then enable_hlt, the
   semantics are right.
3. There is no obvious constraint to prevent user(driver/module)
   code to prevent the case that enable_hlt is ahead of disable_hlt,
   which is a fatal operation on kernel state change from user,
   and there is no any WARNING or notification if the case happens
   in current kernel code.
   This patch aims to report BUG when the case happens, just like
   what the kernel do when enable_irq is ahead of disable_irq.

Link: https://patchwork.kernel.org/patch/1527881/

Signed-off-by: fwu <fwu@marvell.com>
Signed-off-by: YiLu Mao <ylmao@marvell.com>
Signed-off-by: Ning Jiang <ning.jiang@marvell.com>
Acked-by: Nicolas Pitre
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/process.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 90084a6de35..45fd05186a3 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -70,6 +70,7 @@ EXPORT_SYMBOL(disable_hlt);
 void enable_hlt(void)
 {
 	hlt_counter--;
+	BUG_ON(hlt_counter < 0);
 }
 
 EXPORT_SYMBOL(enable_hlt);
-- 
cgit v1.2.3


From 07c9249f1fa90cc8189bed44c0bcece664596a72 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 16 Oct 2012 18:50:00 +0100
Subject: ARM: 7554/1: VIC: use irq_domain_add_simple()

Instead of allocating descriptors on-the-fly for the device tree
initialization case, use irq_domain_add_simple() which will take
care of this if you pass negative as the first_irq.

Alter the signature of __vic_init() to pass the first_irq as
signed so this works as expected.

Switching the VIC to use irq_domain_add_simple() also has the
upside of displaying the same WARNING when you boot with
pre-allocated descriptors on systems using SPARSE_IRQ but
yet not using device tree.

Cc: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/common/vic.c               | 18 ++++++------------
 arch/arm/include/asm/hardware/vic.h |  2 +-
 2 files changed, 7 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/common/vic.c b/arch/arm/common/vic.c
index e0d538803cc..4fd5d980edd 100644
--- a/arch/arm/common/vic.c
+++ b/arch/arm/common/vic.c
@@ -218,7 +218,7 @@ static void __init vic_register(void __iomem *base, unsigned int irq,
 	v->resume_sources = resume_sources;
 	v->irq = irq;
 	vic_id++;
-	v->domain = irq_domain_add_legacy(node, fls(valid_sources), irq, 0,
+	v->domain = irq_domain_add_simple(node, fls(valid_sources), irq,
 					  &vic_irqdomain_ops, v);
 }
 
@@ -350,7 +350,7 @@ static void __init vic_init_st(void __iomem *base, unsigned int irq_start,
 	vic_register(base, irq_start, vic_sources, 0, node);
 }
 
-void __init __vic_init(void __iomem *base, unsigned int irq_start,
+void __init __vic_init(void __iomem *base, int irq_start,
 			      u32 vic_sources, u32 resume_sources,
 			      struct device_node *node)
 {
@@ -416,18 +416,12 @@ int __init vic_of_init(struct device_node *node, struct device_node *parent)
 	if (WARN_ON(!regs))
 		return -EIO;
 
-	irq_base = irq_alloc_descs(-1, 0, 32, numa_node_id());
-	if (WARN_ON(irq_base < 0))
-		goto out_unmap;
-
-	__vic_init(regs, irq_base, ~0, ~0, node);
+	/*
+	 * Passing -1 as first IRQ makes the simple domain allocate descriptors
+	 */
+	__vic_init(regs, -1, ~0, ~0, node);
 
 	return 0;
-
- out_unmap:
-	iounmap(regs);
-
-	return -EIO;
 }
 #endif /* CONFIG OF */
 
diff --git a/arch/arm/include/asm/hardware/vic.h b/arch/arm/include/asm/hardware/vic.h
index e14af1a1a32..2bebad36fc8 100644
--- a/arch/arm/include/asm/hardware/vic.h
+++ b/arch/arm/include/asm/hardware/vic.h
@@ -47,7 +47,7 @@
 struct device_node;
 struct pt_regs;
 
-void __vic_init(void __iomem *base, unsigned int irq_start, u32 vic_sources,
+void __vic_init(void __iomem *base, int irq_start, u32 vic_sources,
 		u32 resume_sources, struct device_node *node);
 void vic_init(void __iomem *base, unsigned int irq_start, u32 vic_sources, u32 resume_sources);
 int vic_of_init(struct device_node *node, struct device_node *parent);
-- 
cgit v1.2.3


From 2577cf246233b1e4e38576f28a5ec05c9c6a6c2a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 22 Oct 2012 10:18:06 +0100
Subject: ARM: 7561/1: SMP_TWD: use clk_prepare_enable()

A minor code refactoring saving a few lines by merging prepare()
and enable() calls.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/smp_twd.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index e1f906989bb..780b0570636 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -248,17 +248,9 @@ static struct clk *twd_get_clock(void)
 		return clk;
 	}
 
-	err = clk_prepare(clk);
+	err = clk_prepare_enable(clk);
 	if (err) {
-		pr_err("smp_twd: clock failed to prepare: %d\n", err);
-		clk_put(clk);
-		return ERR_PTR(err);
-	}
-
-	err = clk_enable(clk);
-	if (err) {
-		pr_err("smp_twd: clock failed to enable: %d\n", err);
-		clk_unprepare(clk);
+		pr_err("smp_twd: clock failed to prepare+enable: %d\n", err);
 		clk_put(clk);
 		return ERR_PTR(err);
 	}
-- 
cgit v1.2.3


From a68becd1dcda55b467dcabaff136cadc10abb761 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 23 Oct 2012 08:29:48 +0100
Subject: ARM: 7563/1: SMP_TWD: make setup()/stop() reentrant

It has been brought to my knowledge that the .setup()/.stop()
function pair in the SMP TWD is going to be called from atomic
contexts for CPUs coming and going, and then the
clk_prepare()/clk_unprepare() calls cannot be called
on subsequent .setup()/.stop() iterations. This is however
just the tip of an iceberg as the function pair is not
designed to be reentrant at all.

This change makes the SMP_TWD clock .setup()/.stop() pair reentrant
by splitting the .setup() function in three parts:

- One COMMON part that is executed the first time the first CPU
  in the TWD cluster is initialized. This will fetch the TWD
  clk for the cluster and prepare+enable it. If no clk is
  available it will calibrate the rate instead.

- One part that is executed the FIRST TIME a certain CPU is
  brought on-line. This initializes and sets up the clock event
  for a certain CPU.

- One part that is executed on every subsequent .setup() call.
  This will re-initialize the clock event. This is augmented
  to call the clk_enable()/clk_disable() pair properly.

Cc: Shawn Guo <shawn.guo@linaro.org>
Reported-by: Peter Chen <peter.chen@freescale.com>
Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/smp_twd.c | 42 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 780b0570636..a2e74375945 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -31,6 +31,8 @@ static void __iomem *twd_base;
 
 static struct clk *twd_clk;
 static unsigned long twd_timer_rate;
+static bool common_setup_called;
+static DEFINE_PER_CPU(bool, percpu_setup_called);
 
 static struct clock_event_device __percpu **twd_evt;
 static int twd_ppi;
@@ -264,15 +266,45 @@ static struct clk *twd_get_clock(void)
 static int __cpuinit twd_timer_setup(struct clock_event_device *clk)
 {
 	struct clock_event_device **this_cpu_clk;
+	int cpu = smp_processor_id();
 
-	if (!twd_clk)
+	/*
+	 * If the basic setup for this CPU has been done before don't
+	 * bother with the below.
+	 */
+	if (per_cpu(percpu_setup_called, cpu)) {
+		__raw_writel(0, twd_base + TWD_TIMER_CONTROL);
+		clockevents_register_device(*__this_cpu_ptr(twd_evt));
+		enable_percpu_irq(clk->irq, 0);
+		return 0;
+	}
+	per_cpu(percpu_setup_called, cpu) = true;
+
+	/*
+	 * This stuff only need to be done once for the entire TWD cluster
+	 * during the runtime of the system.
+	 */
+	if (!common_setup_called) {
 		twd_clk = twd_get_clock();
 
-	if (!IS_ERR_OR_NULL(twd_clk))
-		twd_timer_rate = clk_get_rate(twd_clk);
-	else
-		twd_calibrate_rate();
+		/*
+		 * We use IS_ERR_OR_NULL() here, because if the clock stubs
+		 * are active we will get a valid clk reference which is
+		 * however NULL and will return the rate 0. In that case we
+		 * need to calibrate the rate instead.
+		 */
+		if (!IS_ERR_OR_NULL(twd_clk))
+			twd_timer_rate = clk_get_rate(twd_clk);
+		else
+			twd_calibrate_rate();
+
+		common_setup_called = true;
+	}
 
+	/*
+	 * The following is done once per CPU the first time .setup() is
+	 * called.
+	 */
 	__raw_writel(0, twd_base + TWD_TIMER_CONTROL);
 
 	clk->name = "local_timer";
-- 
cgit v1.2.3


From ee951c630c5ce5108f8014ce1c9d738b5bbfea60 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Mon, 29 Oct 2012 19:19:34 +0100
Subject: ARM: 7568/1: Sort exception table at compile time

Add the ARM machine identifier to sortextable and select the
config option so that we can sort the exception table at compile
time. sortextable relies on a section named __ex_table existing
in the vmlinux, but ARM's linker script places the exception
table in the data section. Give the exception table its own
section so that sortextable can find it.

This allows us to skip the sorting step during boot.

Cc: David Daney <david.daney@cavium.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Tested-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig              |  1 +
 arch/arm/kernel/vmlinux.lds.S | 19 +++++++++----------
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 73067efd484..208414c0506 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -5,6 +5,7 @@ config ARM
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select ARCH_HAVE_CUSTOM_GPIO_H
 	select ARCH_WANT_IPC_PARSE_VERSION
+	select BUILDTIME_EXTABLE_SORT if MMU
 	select CPU_PM if (SUSPEND || CPU_IDLE)
 	select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN
 	select GENERIC_ATOMIC64 if (CPU_V6 || !CPU_32v6K || !AEABI)
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 36ff15bbfdd..b9f38e388b4 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -114,6 +114,15 @@ SECTIONS
 
 	RO_DATA(PAGE_SIZE)
 
+	. = ALIGN(4);
+	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+		__start___ex_table = .;
+#ifdef CONFIG_MMU
+		*(__ex_table)
+#endif
+		__stop___ex_table = .;
+	}
+
 #ifdef CONFIG_ARM_UNWIND
 	/*
 	 * Stack unwinding tables
@@ -219,16 +228,6 @@ SECTIONS
 		CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
 		READ_MOSTLY_DATA(L1_CACHE_BYTES)
 
-		/*
-		 * The exception fixup table (might need resorting at runtime)
-		 */
-		. = ALIGN(4);
-		__start___ex_table = .;
-#ifdef CONFIG_MMU
-		*(__ex_table)
-#endif
-		__stop___ex_table = .;
-
 		/*
 		 * and the usual data section
 		 */
-- 
cgit v1.2.3


From b5466f8728527a05a493cc4abe9e6f034a1bbaab Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 15 Jun 2012 14:47:31 +0100
Subject: ARM: mm: remove IPI broadcasting on ASID rollover

ASIDs are allocated to MMU contexts based on a rolling counter. This
means that after 255 allocations we must invalidate all existing ASIDs
via an expensive IPI mechanism to synchronise all of the online CPUs and
ensure that all tasks execute with an ASID from the new generation.

This patch changes the rollover behaviour so that we rely instead on the
hardware broadcasting of the TLB invalidation to avoid the IPI calls.
This works by keeping track of the active ASID on each core, which is
then reserved in the case of a rollover so that currently scheduled
tasks can continue to run. For cores without hardware TLB broadcasting,
we keep track of pending flushes in a cpumask, so cores can flush their
local TLB before scheduling a new mm.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/mmu.h         |  11 +--
 arch/arm/include/asm/mmu_context.h |  82 +---------------
 arch/arm/mm/context.c              | 186 +++++++++++++++++--------------------
 3 files changed, 93 insertions(+), 186 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 14965658a92..5b53b53ab5c 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -5,18 +5,15 @@
 
 typedef struct {
 #ifdef CONFIG_CPU_HAS_ASID
-	unsigned int id;
-	raw_spinlock_t id_lock;
+	u64 id;
 #endif
 	unsigned int kvm_seq;
 } mm_context_t;
 
 #ifdef CONFIG_CPU_HAS_ASID
-#define ASID(mm)	((mm)->context.id & 255)
-
-/* init_mm.context.id_lock should be initialized. */
-#define INIT_MM_CONTEXT(name)                                                 \
-	.context.id_lock    = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock),
+#define ASID_BITS	8
+#define ASID_MASK	((~0ULL) << ASID_BITS)
+#define ASID(mm)	((mm)->context.id & ~ASID_MASK)
 #else
 #define ASID(mm)	(0)
 #endif
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index 0306bc642c0..a64f61cb23d 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -24,84 +24,8 @@ void __check_kvm_seq(struct mm_struct *mm);
 
 #ifdef CONFIG_CPU_HAS_ASID
 
-/*
- * On ARMv6, we have the following structure in the Context ID:
- *
- * 31                         7          0
- * +-------------------------+-----------+
- * |      process ID         |   ASID    |
- * +-------------------------+-----------+
- * |              context ID             |
- * +-------------------------------------+
- *
- * The ASID is used to tag entries in the CPU caches and TLBs.
- * The context ID is used by debuggers and trace logic, and
- * should be unique within all running processes.
- */
-#define ASID_BITS		8
-#define ASID_MASK		((~0) << ASID_BITS)
-#define ASID_FIRST_VERSION	(1 << ASID_BITS)
-
-extern unsigned int cpu_last_asid;
-
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm);
-void __new_context(struct mm_struct *mm);
-void cpu_set_reserved_ttbr0(void);
-
-static inline void switch_new_context(struct mm_struct *mm)
-{
-	unsigned long flags;
-
-	__new_context(mm);
-
-	local_irq_save(flags);
-	cpu_switch_mm(mm->pgd, mm);
-	local_irq_restore(flags);
-}
-
-static inline void check_and_switch_context(struct mm_struct *mm,
-					    struct task_struct *tsk)
-{
-	if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq))
-		__check_kvm_seq(mm);
-
-	/*
-	 * Required during context switch to avoid speculative page table
-	 * walking with the wrong TTBR.
-	 */
-	cpu_set_reserved_ttbr0();
-
-	if (!((mm->context.id ^ cpu_last_asid) >> ASID_BITS))
-		/*
-		 * The ASID is from the current generation, just switch to the
-		 * new pgd. This condition is only true for calls from
-		 * context_switch() and interrupts are already disabled.
-		 */
-		cpu_switch_mm(mm->pgd, mm);
-	else if (irqs_disabled())
-		/*
-		 * Defer the new ASID allocation until after the context
-		 * switch critical region since __new_context() cannot be
-		 * called with interrupts disabled (it sends IPIs).
-		 */
-		set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM);
-	else
-		/*
-		 * That is a direct call to switch_mm() or activate_mm() with
-		 * interrupts enabled and a new context.
-		 */
-		switch_new_context(mm);
-}
-
-#define init_new_context(tsk,mm)	(__init_new_context(tsk,mm),0)
-
-#define finish_arch_post_lock_switch \
-	finish_arch_post_lock_switch
-static inline void finish_arch_post_lock_switch(void)
-{
-	if (test_and_clear_thread_flag(TIF_SWITCH_MM))
-		switch_new_context(current->mm);
-}
+void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
+#define init_new_context(tsk,mm)	({ mm->context.id = 0; })
 
 #else	/* !CONFIG_CPU_HAS_ASID */
 
@@ -143,6 +67,7 @@ static inline void finish_arch_post_lock_switch(void)
 #endif	/* CONFIG_CPU_HAS_ASID */
 
 #define destroy_context(mm)		do { } while(0)
+#define activate_mm(prev,next)		switch_mm(prev, next, NULL)
 
 /*
  * This is called when "tsk" is about to enter lazy TLB mode.
@@ -186,6 +111,5 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 }
 
 #define deactivate_mm(tsk,mm)	do { } while (0)
-#define activate_mm(prev,next)	switch_mm(prev, next, NULL)
 
 #endif
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 4e07eec1270..3172781a8e2 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -2,6 +2,9 @@
  *  linux/arch/arm/mm/context.c
  *
  *  Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
+ *  Copyright (C) 2012 ARM Limited
+ *
+ *  Author: Will Deacon <will.deacon@arm.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -14,14 +17,35 @@
 #include <linux/percpu.h>
 
 #include <asm/mmu_context.h>
+#include <asm/smp_plat.h>
 #include <asm/thread_notify.h>
 #include <asm/tlbflush.h>
 
+/*
+ * On ARMv6, we have the following structure in the Context ID:
+ *
+ * 31                         7          0
+ * +-------------------------+-----------+
+ * |      process ID         |   ASID    |
+ * +-------------------------+-----------+
+ * |              context ID             |
+ * +-------------------------------------+
+ *
+ * The ASID is used to tag entries in the CPU caches and TLBs.
+ * The context ID is used by debuggers and trace logic, and
+ * should be unique within all running processes.
+ */
+#define ASID_FIRST_VERSION	(1ULL << ASID_BITS)
+
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-unsigned int cpu_last_asid = ASID_FIRST_VERSION;
+static u64 cpu_last_asid = ASID_FIRST_VERSION;
+
+static DEFINE_PER_CPU(u64, active_asids);
+static DEFINE_PER_CPU(u64, reserved_asids);
+static cpumask_t tlb_flush_pending;
 
 #ifdef CONFIG_ARM_LPAE
-void cpu_set_reserved_ttbr0(void)
+static void cpu_set_reserved_ttbr0(void)
 {
 	unsigned long ttbl = __pa(swapper_pg_dir);
 	unsigned long ttbh = 0;
@@ -37,7 +61,7 @@ void cpu_set_reserved_ttbr0(void)
 	isb();
 }
 #else
-void cpu_set_reserved_ttbr0(void)
+static void cpu_set_reserved_ttbr0(void)
 {
 	u32 ttb;
 	/* Copy TTBR1 into TTBR0 */
@@ -84,124 +108,86 @@ static int __init contextidr_notifier_init(void)
 arch_initcall(contextidr_notifier_init);
 #endif
 
-/*
- * We fork()ed a process, and we need a new context for the child
- * to run in.
- */
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+static void flush_context(unsigned int cpu)
 {
-	mm->context.id = 0;
-	raw_spin_lock_init(&mm->context.id_lock);
-}
+	int i;
 
-static void flush_context(void)
-{
-	cpu_set_reserved_ttbr0();
-	local_flush_tlb_all();
-	if (icache_is_vivt_asid_tagged()) {
+	/* Update the list of reserved ASIDs. */
+	per_cpu(active_asids, cpu) = 0;
+	for_each_possible_cpu(i)
+		per_cpu(reserved_asids, i) = per_cpu(active_asids, i);
+
+	/* Queue a TLB invalidate and flush the I-cache if necessary. */
+	if (!tlb_ops_need_broadcast())
+		cpumask_set_cpu(cpu, &tlb_flush_pending);
+	else
+		cpumask_setall(&tlb_flush_pending);
+
+	if (icache_is_vivt_asid_tagged())
 		__flush_icache_all();
-		dsb();
-	}
 }
 
-#ifdef CONFIG_SMP
+static int is_reserved_asid(u64 asid, u64 mask)
+{
+	int cpu;
+	for_each_possible_cpu(cpu)
+		if ((per_cpu(reserved_asids, cpu) & mask) == (asid & mask))
+			return 1;
+	return 0;
+}
 
-static void set_mm_context(struct mm_struct *mm, unsigned int asid)
+static void new_context(struct mm_struct *mm, unsigned int cpu)
 {
-	unsigned long flags;
+	u64 asid = mm->context.id;
 
-	/*
-	 * Locking needed for multi-threaded applications where the
-	 * same mm->context.id could be set from different CPUs during
-	 * the broadcast. This function is also called via IPI so the
-	 * mm->context.id_lock has to be IRQ-safe.
-	 */
-	raw_spin_lock_irqsave(&mm->context.id_lock, flags);
-	if (likely((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) {
+	if (asid != 0 && is_reserved_asid(asid, ULLONG_MAX)) {
 		/*
-		 * Old version of ASID found. Set the new one and
-		 * reset mm_cpumask(mm).
+		 * Our current ASID was active during a rollover, we can
+		 * continue to use it and this was just a false alarm.
 		 */
-		mm->context.id = asid;
+		asid = (cpu_last_asid & ASID_MASK) | (asid & ~ASID_MASK);
+	} else {
+		/*
+		 * Allocate a free ASID. If we can't find one, take a
+		 * note of the currently active ASIDs and mark the TLBs
+		 * as requiring flushes.
+		 */
+		do {
+			asid = ++cpu_last_asid;
+			if ((asid & ~ASID_MASK) == 0)
+				flush_context(cpu);
+		} while (is_reserved_asid(asid, ~ASID_MASK));
 		cpumask_clear(mm_cpumask(mm));
 	}
-	raw_spin_unlock_irqrestore(&mm->context.id_lock, flags);
 
-	/*
-	 * Set the mm_cpumask(mm) bit for the current CPU.
-	 */
-	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+	mm->context.id = asid;
 }
 
-/*
- * Reset the ASID on the current CPU. This function call is broadcast
- * from the CPU handling the ASID rollover and holding cpu_asid_lock.
- */
-static void reset_context(void *info)
+void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
 {
-	unsigned int asid;
+	unsigned long flags;
 	unsigned int cpu = smp_processor_id();
-	struct mm_struct *mm = current->active_mm;
-
-	smp_rmb();
-	asid = cpu_last_asid + cpu + 1;
-
-	flush_context();
-	set_mm_context(mm, asid);
-
-	/* set the new ASID */
-	cpu_switch_mm(mm->pgd, mm);
-}
-
-#else
 
-static inline void set_mm_context(struct mm_struct *mm, unsigned int asid)
-{
-	mm->context.id = asid;
-	cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id()));
-}
+	if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq))
+		__check_kvm_seq(mm);
 
-#endif
-
-void __new_context(struct mm_struct *mm)
-{
-	unsigned int asid;
-
-	raw_spin_lock(&cpu_asid_lock);
-#ifdef CONFIG_SMP
 	/*
-	 * Check the ASID again, in case the change was broadcast from
-	 * another CPU before we acquired the lock.
+	 * Required during context switch to avoid speculative page table
+	 * walking with the wrong TTBR.
 	 */
-	if (unlikely(((mm->context.id ^ cpu_last_asid) >> ASID_BITS) == 0)) {
-		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-		raw_spin_unlock(&cpu_asid_lock);
-		return;
-	}
-#endif
-	/*
-	 * At this point, it is guaranteed that the current mm (with
-	 * an old ASID) isn't active on any other CPU since the ASIDs
-	 * are changed simultaneously via IPI.
-	 */
-	asid = ++cpu_last_asid;
-	if (asid == 0)
-		asid = cpu_last_asid = ASID_FIRST_VERSION;
+	cpu_set_reserved_ttbr0();
 
-	/*
-	 * If we've used up all our ASIDs, we need
-	 * to start a new version and flush the TLB.
-	 */
-	if (unlikely((asid & ~ASID_MASK) == 0)) {
-		asid = cpu_last_asid + smp_processor_id() + 1;
-		flush_context();
-#ifdef CONFIG_SMP
-		smp_wmb();
-		smp_call_function(reset_context, NULL, 1);
-#endif
-		cpu_last_asid += NR_CPUS;
-	}
+	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+	/* Check that our ASID belongs to the current generation. */
+	if ((mm->context.id ^ cpu_last_asid) >> ASID_BITS)
+		new_context(mm, cpu);
 
-	set_mm_context(mm, asid);
-	raw_spin_unlock(&cpu_asid_lock);
+	*this_cpu_ptr(&active_asids) = mm->context.id;
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
+
+	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+		local_flush_tlb_all();
+	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+
+	cpu_switch_mm(mm->pgd, mm);
 }
-- 
cgit v1.2.3


From 4b883160835faf38c9356f0885cf491a1e661e88 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 27 Jul 2012 12:31:35 +0100
Subject: ARM: mm: avoid taking ASID spinlock on fastpath

When scheduling a new mm, we take a spinlock so that we can:

  1. Safely allocate a new ASID, if required
  2. Update our active_asids field without worrying about parallel
     updates to reserved_asids
  3. Ensure that we flush our local TLB, if required

However, this has the nasty affect of serialising context-switch across
all CPUs in the system. The usual (fast) case is where the next mm has
a valid ASID for the current generation. In such a scenario, we can
avoid taking the lock and instead use atomic64_xchg to update the
active_asids variable for the current CPU. If a rollover occurs on
another CPU (which would take the lock), when copying the active_asids
into the reserved_asids another atomic64_xchg is used to replace each
active_asids with 0. The fast path can then detect this case and fall
back to spinning on the lock.

Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mm/context.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 3172781a8e2..5ac09e8b403 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -38,9 +38,9 @@
 #define ASID_FIRST_VERSION	(1ULL << ASID_BITS)
 
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-static u64 cpu_last_asid = ASID_FIRST_VERSION;
+static atomic64_t cpu_last_asid = ATOMIC64_INIT(ASID_FIRST_VERSION);
 
-static DEFINE_PER_CPU(u64, active_asids);
+static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
 static cpumask_t tlb_flush_pending;
 
@@ -113,9 +113,10 @@ static void flush_context(unsigned int cpu)
 	int i;
 
 	/* Update the list of reserved ASIDs. */
-	per_cpu(active_asids, cpu) = 0;
 	for_each_possible_cpu(i)
-		per_cpu(reserved_asids, i) = per_cpu(active_asids, i);
+		per_cpu(reserved_asids, i) =
+			atomic64_xchg(&per_cpu(active_asids, i), 0);
+	per_cpu(reserved_asids, cpu) = 0;
 
 	/* Queue a TLB invalidate and flush the I-cache if necessary. */
 	if (!tlb_ops_need_broadcast())
@@ -145,7 +146,8 @@ static void new_context(struct mm_struct *mm, unsigned int cpu)
 		 * Our current ASID was active during a rollover, we can
 		 * continue to use it and this was just a false alarm.
 		 */
-		asid = (cpu_last_asid & ASID_MASK) | (asid & ~ASID_MASK);
+		asid = (atomic64_read(&cpu_last_asid) & ASID_MASK) | \
+		       (asid & ~ASID_MASK);
 	} else {
 		/*
 		 * Allocate a free ASID. If we can't find one, take a
@@ -153,7 +155,7 @@ static void new_context(struct mm_struct *mm, unsigned int cpu)
 		 * as requiring flushes.
 		 */
 		do {
-			asid = ++cpu_last_asid;
+			asid = atomic64_inc_return(&cpu_last_asid);
 			if ((asid & ~ASID_MASK) == 0)
 				flush_context(cpu);
 		} while (is_reserved_asid(asid, ~ASID_MASK));
@@ -177,17 +179,22 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
 	 */
 	cpu_set_reserved_ttbr0();
 
+	if (!((mm->context.id ^ atomic64_read(&cpu_last_asid)) >> ASID_BITS)
+	    && atomic64_xchg(&per_cpu(active_asids, cpu), mm->context.id))
+		goto switch_mm_fastpath;
+
 	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
 	/* Check that our ASID belongs to the current generation. */
-	if ((mm->context.id ^ cpu_last_asid) >> ASID_BITS)
+	if ((mm->context.id ^ atomic64_read(&cpu_last_asid)) >> ASID_BITS)
 		new_context(mm, cpu);
 
-	*this_cpu_ptr(&active_asids) = mm->context.id;
+	atomic64_set(&per_cpu(active_asids, cpu), mm->context.id);
 	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
 	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
 		local_flush_tlb_all();
 	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
 
+switch_mm_fastpath:
 	cpu_switch_mm(mm->pgd, mm);
 }
-- 
cgit v1.2.3


From bf51bb82ccd9a74e9702d06107b23e54b27a5707 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 1 Aug 2012 14:57:49 +0100
Subject: ARM: mm: use bitmap operations when allocating new ASIDs

When allocating a new ASID, we must take care not to re-assign a
reserved ASID-value to a new mm. This requires us to check each
candidate ASID against those currently reserved by other cores before
assigning a new ASID to the current mm.

This patch improves the ASID allocation algorithm by using a
bitmap-based approach. Rather than iterating over the reserved ASID
array for each candidate ASID, we simply find the first zero bit,
ensuring that those indices corresponding to reserved ASIDs are set
when flushing during a rollover event.

Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mm/context.c | 54 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 5ac09e8b403..7a27d7363be 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -36,9 +36,14 @@
  * should be unique within all running processes.
  */
 #define ASID_FIRST_VERSION	(1ULL << ASID_BITS)
+#define NUM_USER_ASIDS		(ASID_FIRST_VERSION - 1)
+
+#define ASID_TO_IDX(asid)	((asid & ~ASID_MASK) - 1)
+#define IDX_TO_ASID(idx)	((idx + 1) & ~ASID_MASK)
 
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-static atomic64_t cpu_last_asid = ATOMIC64_INIT(ASID_FIRST_VERSION);
+static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION);
+static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS);
 
 static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
@@ -111,12 +116,19 @@ arch_initcall(contextidr_notifier_init);
 static void flush_context(unsigned int cpu)
 {
 	int i;
-
-	/* Update the list of reserved ASIDs. */
-	for_each_possible_cpu(i)
-		per_cpu(reserved_asids, i) =
-			atomic64_xchg(&per_cpu(active_asids, i), 0);
-	per_cpu(reserved_asids, cpu) = 0;
+	u64 asid;
+
+	/* Update the list of reserved ASIDs and the ASID bitmap. */
+	bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
+	for_each_possible_cpu(i) {
+		if (i == cpu) {
+			asid = 0;
+		} else {
+			asid = atomic64_xchg(&per_cpu(active_asids, i), 0);
+			__set_bit(ASID_TO_IDX(asid), asid_map);
+		}
+		per_cpu(reserved_asids, i) = asid;
+	}
 
 	/* Queue a TLB invalidate and flush the I-cache if necessary. */
 	if (!tlb_ops_need_broadcast())
@@ -128,11 +140,11 @@ static void flush_context(unsigned int cpu)
 		__flush_icache_all();
 }
 
-static int is_reserved_asid(u64 asid, u64 mask)
+static int is_reserved_asid(u64 asid)
 {
 	int cpu;
 	for_each_possible_cpu(cpu)
-		if ((per_cpu(reserved_asids, cpu) & mask) == (asid & mask))
+		if (per_cpu(reserved_asids, cpu) == asid)
 			return 1;
 	return 0;
 }
@@ -140,25 +152,29 @@ static int is_reserved_asid(u64 asid, u64 mask)
 static void new_context(struct mm_struct *mm, unsigned int cpu)
 {
 	u64 asid = mm->context.id;
+	u64 generation = atomic64_read(&asid_generation);
 
-	if (asid != 0 && is_reserved_asid(asid, ULLONG_MAX)) {
+	if (asid != 0 && is_reserved_asid(asid)) {
 		/*
 		 * Our current ASID was active during a rollover, we can
 		 * continue to use it and this was just a false alarm.
 		 */
-		asid = (atomic64_read(&cpu_last_asid) & ASID_MASK) | \
-		       (asid & ~ASID_MASK);
+		asid = generation | (asid & ~ASID_MASK);
 	} else {
 		/*
 		 * Allocate a free ASID. If we can't find one, take a
 		 * note of the currently active ASIDs and mark the TLBs
 		 * as requiring flushes.
 		 */
-		do {
-			asid = atomic64_inc_return(&cpu_last_asid);
-			if ((asid & ~ASID_MASK) == 0)
-				flush_context(cpu);
-		} while (is_reserved_asid(asid, ~ASID_MASK));
+		asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS);
+		if (asid == NUM_USER_ASIDS) {
+			generation = atomic64_add_return(ASID_FIRST_VERSION,
+							 &asid_generation);
+			flush_context(cpu);
+			asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS);
+		}
+		__set_bit(asid, asid_map);
+		asid = generation | IDX_TO_ASID(asid);
 		cpumask_clear(mm_cpumask(mm));
 	}
 
@@ -179,13 +195,13 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
 	 */
 	cpu_set_reserved_ttbr0();
 
-	if (!((mm->context.id ^ atomic64_read(&cpu_last_asid)) >> ASID_BITS)
+	if (!((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS)
 	    && atomic64_xchg(&per_cpu(active_asids, cpu), mm->context.id))
 		goto switch_mm_fastpath;
 
 	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
 	/* Check that our ASID belongs to the current generation. */
-	if ((mm->context.id ^ atomic64_read(&cpu_last_asid)) >> ASID_BITS)
+	if ((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS)
 		new_context(mm, cpu);
 
 	atomic64_set(&per_cpu(active_asids, cpu), mm->context.id);
-- 
cgit v1.2.3


From b8db6b886a1fecd6a5b1d13b190f3149247305ef Mon Sep 17 00:00:00 2001
From: Gregory CLEMENT <gregory.clement@free-electrons.com>
Date: Tue, 6 Nov 2012 01:58:07 +0100
Subject: ARM: 7547/4: cache-l2x0: add support for Aurora L2 cache ctrl

Aurora Cache Controller was designed to be compatible with the ARM L2
Cache Controller. It comes with some difference or improvement such
as:
- no cache id part number available through hardware (need to get it
  by the DT).
- always write through mode available.
- two flavors of the controller outer cache and system cache (meaning
  maintenance operations on L1 are broadcasted to the L2 and L2
  performs the same operation).
- in outer cache mode, the cache maintenance operations are improved and
  can be done on a range inside a page and are not limited to a cache
  line.

Tested-and-Reviewed-by: Lior Amsalem <alior@marvell.com>

Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Yehuda Yitschak <yehuday@marvell.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/hardware/cache-l2x0.h |   4 +
 arch/arm/mm/cache-aurora-l2.h              |  55 +++++++
 arch/arm/mm/cache-l2x0.c                   | 223 +++++++++++++++++++++++++++--
 3 files changed, 269 insertions(+), 13 deletions(-)
 create mode 100644 arch/arm/mm/cache-aurora-l2.h

(limited to 'arch')

diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index 5f2c7b44fda..3b2c40b5bfa 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -102,6 +102,10 @@
 
 #define L2X0_ADDR_FILTER_EN		1
 
+#define L2X0_CTRL_EN			1
+
+#define L2X0_WAY_SIZE_SHIFT		3
+
 #ifndef __ASSEMBLY__
 extern void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask);
 #if defined(CONFIG_CACHE_L2X0) && defined(CONFIG_OF)
diff --git a/arch/arm/mm/cache-aurora-l2.h b/arch/arm/mm/cache-aurora-l2.h
new file mode 100644
index 00000000000..c8612476983
--- /dev/null
+++ b/arch/arm/mm/cache-aurora-l2.h
@@ -0,0 +1,55 @@
+/*
+ * AURORA shared L2 cache controller support
+ *
+ * Copyright (C) 2012 Marvell
+ *
+ * Yehuda Yitschak <yehuday@marvell.com>
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __ASM_ARM_HARDWARE_AURORA_L2_H
+#define __ASM_ARM_HARDWARE_AURORA_L2_H
+
+#define AURORA_SYNC_REG		    0x700
+#define AURORA_RANGE_BASE_ADDR_REG  0x720
+#define AURORA_FLUSH_PHY_ADDR_REG   0x7f0
+#define AURORA_INVAL_RANGE_REG	    0x774
+#define AURORA_CLEAN_RANGE_REG	    0x7b4
+#define AURORA_FLUSH_RANGE_REG	    0x7f4
+
+#define AURORA_ACR_REPLACEMENT_OFFSET	    27
+#define AURORA_ACR_REPLACEMENT_MASK	     \
+	(0x3 << AURORA_ACR_REPLACEMENT_OFFSET)
+#define AURORA_ACR_REPLACEMENT_TYPE_WAYRR    \
+	(0 << AURORA_ACR_REPLACEMENT_OFFSET)
+#define AURORA_ACR_REPLACEMENT_TYPE_LFSR     \
+	(1 << AURORA_ACR_REPLACEMENT_OFFSET)
+#define AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU \
+	(3 << AURORA_ACR_REPLACEMENT_OFFSET)
+
+#define AURORA_ACR_FORCE_WRITE_POLICY_OFFSET	0
+#define AURORA_ACR_FORCE_WRITE_POLICY_MASK	\
+	(0x3 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+#define AURORA_ACR_FORCE_WRITE_POLICY_DIS	\
+	(0 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+#define AURORA_ACR_FORCE_WRITE_BACK_POLICY	\
+	(1 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+#define AURORA_ACR_FORCE_WRITE_THRO_POLICY	\
+	(2 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+
+#define MAX_RANGE_SIZE		1024
+
+#define AURORA_WAY_SIZE_SHIFT	2
+
+#define AURORA_CTRL_FW		0x100
+
+/* chose a number outside L2X0_CACHE_ID_PART_MASK to be sure to make
+ * the distinction between a number coming from hardware and a number
+ * coming from the device tree */
+#define AURORA_CACHE_ID	       0x100
+
+#endif /* __ASM_ARM_HARDWARE_AURORA_L2_H */
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index db55d18691e..6911b8b2745 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -25,6 +25,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/hardware/cache-l2x0.h>
+#include "cache-aurora-l2.h"
 
 #define CACHE_LINE_SIZE		32
 
@@ -34,6 +35,10 @@ static u32 l2x0_way_mask;	/* Bitmask of active ways */
 static u32 l2x0_size;
 static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
 
+/* Aurora don't have the cache ID register available, so we have to
+ * pass it though the device tree */
+static u32  cache_id_part_number_from_dt;
+
 struct l2x0_regs l2x0_saved_regs;
 
 struct l2x0_of_data {
@@ -170,7 +175,7 @@ static void l2x0_inv_all(void)
 	/* invalidate all ways */
 	raw_spin_lock_irqsave(&l2x0_lock, flags);
 	/* Invalidating when L2 is enabled is a nono */
-	BUG_ON(readl(l2x0_base + L2X0_CTRL) & 1);
+	BUG_ON(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN);
 	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY);
 	cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask);
 	cache_sync();
@@ -294,11 +299,18 @@ static void l2x0_unlock(u32 cache_id)
 	int lockregs;
 	int i;
 
-	if (cache_id == L2X0_CACHE_ID_PART_L310)
+	switch (cache_id) {
+	case L2X0_CACHE_ID_PART_L310:
 		lockregs = 8;
-	else
+		break;
+	case AURORA_CACHE_ID:
+		lockregs = 4;
+		break;
+	default:
 		/* L210 and unknown types */
 		lockregs = 1;
+		break;
+	}
 
 	for (i = 0; i < lockregs; i++) {
 		writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE +
@@ -314,18 +326,22 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 	u32 cache_id;
 	u32 way_size = 0;
 	int ways;
+	int way_size_shift = L2X0_WAY_SIZE_SHIFT;
 	const char *type;
 
 	l2x0_base = base;
-
-	cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
+	if (cache_id_part_number_from_dt)
+		cache_id = cache_id_part_number_from_dt;
+	else
+		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID)
+			& L2X0_CACHE_ID_PART_MASK;
 	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
 
 	aux &= aux_mask;
 	aux |= aux_val;
 
 	/* Determine the number of ways */
-	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
+	switch (cache_id) {
 	case L2X0_CACHE_ID_PART_L310:
 		if (aux & (1 << 16))
 			ways = 16;
@@ -342,6 +358,14 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 		ways = (aux >> 13) & 0xf;
 		type = "L210";
 		break;
+
+	case AURORA_CACHE_ID:
+		sync_reg_offset = AURORA_SYNC_REG;
+		ways = (aux >> 13) & 0xf;
+		ways = 2 << ((ways + 1) >> 2);
+		way_size_shift = AURORA_WAY_SIZE_SHIFT;
+		type = "Aurora";
+		break;
 	default:
 		/* Assume unknown chips have 8 ways */
 		ways = 8;
@@ -355,7 +379,8 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 	 * L2 cache Size =  Way size * Number of ways
 	 */
 	way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17;
-	way_size = 1 << (way_size + 3);
+	way_size = 1 << (way_size + way_size_shift);
+
 	l2x0_size = ways * way_size * SZ_1K;
 
 	/*
@@ -363,7 +388,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 	 * If you are booting from non-secure mode
 	 * accessing the below registers will fault.
 	 */
-	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
 		/* Make sure that I&D is not locked down when starting */
 		l2x0_unlock(cache_id);
 
@@ -373,7 +398,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 		l2x0_inv_all();
 
 		/* enable L2X0 */
-		writel_relaxed(1, l2x0_base + L2X0_CTRL);
+		writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL);
 	}
 
 	/* Re-read it in case some bits are reserved. */
@@ -398,6 +423,100 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 }
 
 #ifdef CONFIG_OF
+static int l2_wt_override;
+
+/*
+ * Note that the end addresses passed to Linux primitives are
+ * noninclusive, while the hardware cache range operations use
+ * inclusive start and end addresses.
+ */
+static unsigned long calc_range_end(unsigned long start, unsigned long end)
+{
+	/*
+	 * Limit the number of cache lines processed at once,
+	 * since cache range operations stall the CPU pipeline
+	 * until completion.
+	 */
+	if (end > start + MAX_RANGE_SIZE)
+		end = start + MAX_RANGE_SIZE;
+
+	/*
+	 * Cache range operations can't straddle a page boundary.
+	 */
+	if (end > PAGE_ALIGN(start+1))
+		end = PAGE_ALIGN(start+1);
+
+	return end;
+}
+
+/*
+ * Make sure 'start' and 'end' reference the same page, as L2 is PIPT
+ * and range operations only do a TLB lookup on the start address.
+ */
+static void aurora_pa_range(unsigned long start, unsigned long end,
+			unsigned long offset)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	writel(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG);
+	writel(end, l2x0_base + offset);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+
+	cache_sync();
+}
+
+static void aurora_inv_range(unsigned long start, unsigned long end)
+{
+	/*
+	 * round start and end adresses up to cache line size
+	 */
+	start &= ~(CACHE_LINE_SIZE - 1);
+	end = ALIGN(end, CACHE_LINE_SIZE);
+
+	/*
+	 * Invalidate all full cache lines between 'start' and 'end'.
+	 */
+	while (start < end) {
+		unsigned long range_end = calc_range_end(start, end);
+		aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
+				AURORA_INVAL_RANGE_REG);
+		start = range_end;
+	}
+}
+
+static void aurora_clean_range(unsigned long start, unsigned long end)
+{
+	/*
+	 * If L2 is forced to WT, the L2 will always be clean and we
+	 * don't need to do anything here.
+	 */
+	if (!l2_wt_override) {
+		start &= ~(CACHE_LINE_SIZE - 1);
+		end = ALIGN(end, CACHE_LINE_SIZE);
+		while (start != end) {
+			unsigned long range_end = calc_range_end(start, end);
+			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
+					AURORA_CLEAN_RANGE_REG);
+			start = range_end;
+		}
+	}
+}
+
+static void aurora_flush_range(unsigned long start, unsigned long end)
+{
+	if (!l2_wt_override) {
+		start &= ~(CACHE_LINE_SIZE - 1);
+		end = ALIGN(end, CACHE_LINE_SIZE);
+		while (start != end) {
+			unsigned long range_end = calc_range_end(start, end);
+			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
+					AURORA_FLUSH_RANGE_REG);
+			start = range_end;
+		}
+	}
+}
+
 static void __init l2x0_of_setup(const struct device_node *np,
 				 u32 *aux_val, u32 *aux_mask)
 {
@@ -495,9 +614,15 @@ static void __init pl310_save(void)
 	}
 }
 
+static void aurora_save(void)
+{
+	l2x0_saved_regs.ctrl = readl_relaxed(l2x0_base + L2X0_CTRL);
+	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+}
+
 static void l2x0_resume(void)
 {
-	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
 		/* restore aux ctrl and enable l2 */
 		l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID));
 
@@ -506,7 +631,7 @@ static void l2x0_resume(void)
 
 		l2x0_inv_all();
 
-		writel_relaxed(1, l2x0_base + L2X0_CTRL);
+		writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL);
 	}
 }
 
@@ -514,7 +639,7 @@ static void pl310_resume(void)
 {
 	u32 l2x0_revision;
 
-	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
 		/* restore pl310 setup */
 		writel_relaxed(l2x0_saved_regs.tag_latency,
 			l2x0_base + L2X0_TAG_LATENCY_CTRL);
@@ -540,6 +665,46 @@ static void pl310_resume(void)
 	l2x0_resume();
 }
 
+static void aurora_resume(void)
+{
+	if (!(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+		writel(l2x0_saved_regs.aux_ctrl, l2x0_base + L2X0_AUX_CTRL);
+		writel(l2x0_saved_regs.ctrl, l2x0_base + L2X0_CTRL);
+	}
+}
+
+static void __init aurora_broadcast_l2_commands(void)
+{
+	__u32 u;
+	/* Enable Broadcasting of cache commands to L2*/
+	__asm__ __volatile__("mrc p15, 1, %0, c15, c2, 0" : "=r"(u));
+	u |= AURORA_CTRL_FW;		/* Set the FW bit */
+	__asm__ __volatile__("mcr p15, 1, %0, c15, c2, 0\n" : : "r"(u));
+	isb();
+}
+
+static void __init aurora_of_setup(const struct device_node *np,
+				u32 *aux_val, u32 *aux_mask)
+{
+	u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
+	u32 mask =  AURORA_ACR_REPLACEMENT_MASK;
+
+	of_property_read_u32(np, "cache-id-part",
+			&cache_id_part_number_from_dt);
+
+	/* Determine and save the write policy */
+	l2_wt_override = of_property_read_bool(np, "wt-override");
+
+	if (l2_wt_override) {
+		val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
+		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
+	}
+
+	*aux_val &= ~mask;
+	*aux_val |= val;
+	*aux_mask &= ~mask;
+}
+
 static const struct l2x0_of_data pl310_data = {
 	.setup = pl310_of_setup,
 	.save  = pl310_save,
@@ -571,10 +736,37 @@ static const struct l2x0_of_data l2x0_data = {
 	},
 };
 
+static const struct l2x0_of_data aurora_with_outer_data = {
+	.setup = aurora_of_setup,
+	.save  = aurora_save,
+	.outer_cache = {
+		.resume      = aurora_resume,
+		.inv_range   = aurora_inv_range,
+		.clean_range = aurora_clean_range,
+		.flush_range = aurora_flush_range,
+		.sync        = l2x0_cache_sync,
+		.flush_all   = l2x0_flush_all,
+		.inv_all     = l2x0_inv_all,
+		.disable     = l2x0_disable,
+	},
+};
+
+static const struct l2x0_of_data aurora_no_outer_data = {
+	.setup = aurora_of_setup,
+	.save  = aurora_save,
+	.outer_cache = {
+		.resume      = aurora_resume,
+	},
+};
+
 static const struct of_device_id l2x0_ids[] __initconst = {
 	{ .compatible = "arm,pl310-cache", .data = (void *)&pl310_data },
 	{ .compatible = "arm,l220-cache", .data = (void *)&l2x0_data },
 	{ .compatible = "arm,l210-cache", .data = (void *)&l2x0_data },
+	{ .compatible = "marvell,aurora-system-cache",
+	  .data = (void *)&aurora_no_outer_data},
+	{ .compatible = "marvell,aurora-outer-cache",
+	  .data = (void *)&aurora_with_outer_data},
 	{}
 };
 
@@ -600,9 +792,14 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 	data = of_match_node(l2x0_ids, np)->data;
 
 	/* L2 configuration can only be changed if the cache is disabled */
-	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
 		if (data->setup)
 			data->setup(np, &aux_val, &aux_mask);
+
+		/* For aurora cache in no outer mode select the
+		 * correct mode using the coprocessor*/
+		if (data == &aurora_no_outer_data)
+			aurora_broadcast_l2_commands();
 	}
 
 	if (data->save)
-- 
cgit v1.2.3


From 946c59a08a2497303750c0fee4367ca32009155c Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 8 Nov 2012 10:48:30 +0000
Subject: ARM: vic: fix build warning caused by previous commit

07c9249f1f (ARM: 7554/1: VIC: use irq_domain_add_simple()) removed the
use of 'irq_base' but did not remove the variable itself, which causes:

arch/arm/common/vic.c: In function 'vic_of_init':
arch/arm/common/vic.c:410:6: warning: unused variable 'irq_base'

Remove this now unused variable.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/common/vic.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/common/vic.c b/arch/arm/common/vic.c
index 4fd5d980edd..e4df17ca90c 100644
--- a/arch/arm/common/vic.c
+++ b/arch/arm/common/vic.c
@@ -407,7 +407,6 @@ void __init vic_init(void __iomem *base, unsigned int irq_start,
 int __init vic_of_init(struct device_node *node, struct device_node *parent)
 {
 	void __iomem *regs;
-	int irq_base;
 
 	if (WARN(parent, "non-root VICs are not supported"))
 		return -EINVAL;
-- 
cgit v1.2.3


From e50c54189f7c6211a99539156e3978474f0b1a0b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Thu, 13 Sep 2012 16:40:46 +0100
Subject: ARM: perf: add guest vs host discrimination

Add minimal guest support to perf, so it can distinguish whether
the PMU interrupt was in the host or the guest, as well as collecting
some very basic information (guest PC, user vs kernel mode).

This is not feature complete though, as it doesn't support backtracing
in the guest.

Based on the x86 implementation, tested with KVM/ARM.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/perf_event.h |  5 +++++
 arch/arm/kernel/perf_event.c      | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index 625cd621a43..00416edecea 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -21,4 +21,9 @@
 #define C(_x)				PERF_COUNT_HW_CACHE_##_x
 #define CACHE_OP_UNSUPPORTED		0xFFFF
 
+struct pt_regs;
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs)	perf_misc_flags(regs)
+
 #endif /* __ARM_PERF_EVENT_H__ */
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 53c0304b734..f8406af0327 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -576,6 +576,10 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 	struct frame_tail __user *tail;
 
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* We don't support guest os callchain now */
+		return;
+	}
 
 	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
 
@@ -603,9 +607,41 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 	struct stackframe fr;
 
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* We don't support guest os callchain now */
+		return;
+	}
+
 	fr.fp = regs->ARM_fp;
 	fr.sp = regs->ARM_sp;
 	fr.lr = regs->ARM_lr;
 	fr.pc = regs->ARM_pc;
 	walk_stackframe(&fr, callchain_trace, entry);
 }
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+		return perf_guest_cbs->get_guest_ip();
+
+	return instruction_pointer(regs);
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	int misc = 0;
+
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		if (perf_guest_cbs->is_user_mode())
+			misc |= PERF_RECORD_MISC_GUEST_USER;
+		else
+			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+	} else {
+		if (user_mode(regs))
+			misc |= PERF_RECORD_MISC_USER;
+		else
+			misc |= PERF_RECORD_MISC_KERNEL;
+	}
+
+	return misc;
+}
-- 
cgit v1.2.3


From 513c99ce4e64245be1f83f56039ec4891b451955 Mon Sep 17 00:00:00 2001
From: Sudeep KarkadaNagesha <Sudeep.KarkadaNagesha@arm.com>
Date: Tue, 31 Jul 2012 10:11:23 +0100
Subject: ARM: perf: allocate CPU PMU dynamically at probe time

Supporting multiple, heterogeneous CPU PMUs requires us to allocate the
arm_pmu structures dynamically as the devices are probed.

This patch removes the static structure definitions for each CPU PMU
type and instead passes pointers to the PMU-specific init functions.

Signed-off-by: Sudeep KarkadaNagesha <Sudeep.KarkadaNagesha@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event_cpu.c    |  47 ++++++++++------
 arch/arm/kernel/perf_event_v6.c     |  72 ++++++++++++------------
 arch/arm/kernel/perf_event_v7.c     | 106 +++++++++++++++++++-----------------
 arch/arm/kernel/perf_event_xscale.c |  72 ++++++++++++------------
 4 files changed, 153 insertions(+), 144 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 8d7d8d4de9d..3863fd405fa 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 
 #include <asm/cputype.h>
@@ -195,13 +196,13 @@ static struct platform_device_id __devinitdata cpu_pmu_plat_device_ids[] = {
 /*
  * CPU PMU identification and probing.
  */
-static struct arm_pmu *__devinit probe_current_pmu(void)
+static int __devinit probe_current_pmu(struct arm_pmu *pmu)
 {
-	struct arm_pmu *pmu = NULL;
 	int cpu = get_cpu();
 	unsigned long cpuid = read_cpuid_id();
 	unsigned long implementor = (cpuid & 0xFF000000) >> 24;
 	unsigned long part_number = (cpuid & 0xFFF0);
+	int ret = -ENODEV;
 
 	pr_info("probing PMU on CPU %d\n", cpu);
 
@@ -211,25 +212,25 @@ static struct arm_pmu *__devinit probe_current_pmu(void)
 		case 0xB360:	/* ARM1136 */
 		case 0xB560:	/* ARM1156 */
 		case 0xB760:	/* ARM1176 */
-			pmu = armv6pmu_init();
+			ret = armv6pmu_init(pmu);
 			break;
 		case 0xB020:	/* ARM11mpcore */
-			pmu = armv6mpcore_pmu_init();
+			ret = armv6mpcore_pmu_init(pmu);
 			break;
 		case 0xC080:	/* Cortex-A8 */
-			pmu = armv7_a8_pmu_init();
+			ret = armv7_a8_pmu_init(pmu);
 			break;
 		case 0xC090:	/* Cortex-A9 */
-			pmu = armv7_a9_pmu_init();
+			ret = armv7_a9_pmu_init(pmu);
 			break;
 		case 0xC050:	/* Cortex-A5 */
-			pmu = armv7_a5_pmu_init();
+			ret = armv7_a5_pmu_init(pmu);
 			break;
 		case 0xC0F0:	/* Cortex-A15 */
-			pmu = armv7_a15_pmu_init();
+			ret = armv7_a15_pmu_init(pmu);
 			break;
 		case 0xC070:	/* Cortex-A7 */
-			pmu = armv7_a7_pmu_init();
+			ret = armv7_a7_pmu_init(pmu);
 			break;
 		}
 	/* Intel CPUs [xscale]. */
@@ -237,39 +238,51 @@ static struct arm_pmu *__devinit probe_current_pmu(void)
 		part_number = (cpuid >> 13) & 0x7;
 		switch (part_number) {
 		case 1:
-			pmu = xscale1pmu_init();
+			ret = xscale1pmu_init(pmu);
 			break;
 		case 2:
-			pmu = xscale2pmu_init();
+			ret = xscale2pmu_init(pmu);
 			break;
 		}
 	}
 
 	put_cpu();
-	return pmu;
+	return ret;
 }
 
 static int __devinit cpu_pmu_device_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id;
-	struct arm_pmu *(*init_fn)(void);
+	int (*init_fn)(struct arm_pmu *);
 	struct device_node *node = pdev->dev.of_node;
+	struct arm_pmu *pmu;
+	int ret = -ENODEV;
 
 	if (cpu_pmu) {
 		pr_info("attempt to register multiple PMU devices!");
 		return -ENOSPC;
 	}
 
+	pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
+	if (!pmu) {
+		pr_info("failed to allocate PMU device!");
+		return -ENOMEM;
+	}
+
 	if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
 		init_fn = of_id->data;
-		cpu_pmu = init_fn();
+		ret = init_fn(pmu);
 	} else {
-		cpu_pmu = probe_current_pmu();
+		ret = probe_current_pmu(pmu);
 	}
 
-	if (!cpu_pmu)
-		return -ENODEV;
+	if (ret) {
+		pr_info("failed to register PMU devices!");
+		kfree(pmu);
+		return ret;
+	}
 
+	cpu_pmu = pmu;
 	cpu_pmu->plat_device = pdev;
 	cpu_pmu_init(cpu_pmu);
 	register_cpu_notifier(&cpu_pmu_hotplug_notifier);
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index 6ccc0797174..3908cb4e556 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -649,24 +649,22 @@ static int armv6_map_event(struct perf_event *event)
 				&armv6_perf_cache_map, 0xFF);
 }
 
-static struct arm_pmu armv6pmu = {
-	.name			= "v6",
-	.handle_irq		= armv6pmu_handle_irq,
-	.enable			= armv6pmu_enable_event,
-	.disable		= armv6pmu_disable_event,
-	.read_counter		= armv6pmu_read_counter,
-	.write_counter		= armv6pmu_write_counter,
-	.get_event_idx		= armv6pmu_get_event_idx,
-	.start			= armv6pmu_start,
-	.stop			= armv6pmu_stop,
-	.map_event		= armv6_map_event,
-	.num_events		= 3,
-	.max_period		= (1LLU << 32) - 1,
-};
-
-static struct arm_pmu *__devinit armv6pmu_init(void)
+static int __devinit armv6pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return &armv6pmu;
+	cpu_pmu->name		= "v6";
+	cpu_pmu->handle_irq	= armv6pmu_handle_irq;
+	cpu_pmu->enable		= armv6pmu_enable_event;
+	cpu_pmu->disable	= armv6pmu_disable_event;
+	cpu_pmu->read_counter	= armv6pmu_read_counter;
+	cpu_pmu->write_counter	= armv6pmu_write_counter;
+	cpu_pmu->get_event_idx	= armv6pmu_get_event_idx;
+	cpu_pmu->start		= armv6pmu_start;
+	cpu_pmu->stop		= armv6pmu_stop;
+	cpu_pmu->map_event	= armv6_map_event;
+	cpu_pmu->num_events	= 3;
+	cpu_pmu->max_period	= (1LLU << 32) - 1;
+
+	return 0;
 }
 
 /*
@@ -683,33 +681,31 @@ static int armv6mpcore_map_event(struct perf_event *event)
 				&armv6mpcore_perf_cache_map, 0xFF);
 }
 
-static struct arm_pmu armv6mpcore_pmu = {
-	.name			= "v6mpcore",
-	.handle_irq		= armv6pmu_handle_irq,
-	.enable			= armv6pmu_enable_event,
-	.disable		= armv6mpcore_pmu_disable_event,
-	.read_counter		= armv6pmu_read_counter,
-	.write_counter		= armv6pmu_write_counter,
-	.get_event_idx		= armv6pmu_get_event_idx,
-	.start			= armv6pmu_start,
-	.stop			= armv6pmu_stop,
-	.map_event		= armv6mpcore_map_event,
-	.num_events		= 3,
-	.max_period		= (1LLU << 32) - 1,
-};
-
-static struct arm_pmu *__devinit armv6mpcore_pmu_init(void)
+static int __devinit armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return &armv6mpcore_pmu;
+	cpu_pmu->name		= "v6mpcore";
+	cpu_pmu->handle_irq	= armv6pmu_handle_irq;
+	cpu_pmu->enable		= armv6pmu_enable_event;
+	cpu_pmu->disable	= armv6mpcore_pmu_disable_event;
+	cpu_pmu->read_counter	= armv6pmu_read_counter;
+	cpu_pmu->write_counter	= armv6pmu_write_counter;
+	cpu_pmu->get_event_idx	= armv6pmu_get_event_idx;
+	cpu_pmu->start		= armv6pmu_start;
+	cpu_pmu->stop		= armv6pmu_stop;
+	cpu_pmu->map_event	= armv6mpcore_map_event;
+	cpu_pmu->num_events	= 3;
+	cpu_pmu->max_period	= (1LLU << 32) - 1;
+
+	return 0;
 }
 #else
-static struct arm_pmu *__devinit armv6pmu_init(void)
+static int armv6pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 
-static struct arm_pmu *__devinit armv6mpcore_pmu_init(void)
+static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 #endif	/* CONFIG_CPU_V6 || CONFIG_CPU_V6K */
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index bd4b090ebcf..b189403f30e 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -18,8 +18,6 @@
 
 #ifdef CONFIG_CPU_V7
 
-static struct arm_pmu armv7pmu;
-
 /*
  * Common ARMv7 event types
  *
@@ -1014,7 +1012,7 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
 	 * We only need to set the event for the cycle counter if we
 	 * have the ability to perform event filtering.
 	 */
-	if (armv7pmu.set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
+	if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
 		armv7_pmnc_write_evtsel(idx, hwc->config_base);
 
 	/*
@@ -1232,17 +1230,18 @@ static int armv7_a7_map_event(struct perf_event *event)
 				&armv7_a7_perf_cache_map, 0xFF);
 }
 
-static struct arm_pmu armv7pmu = {
-	.handle_irq		= armv7pmu_handle_irq,
-	.enable			= armv7pmu_enable_event,
-	.disable		= armv7pmu_disable_event,
-	.read_counter		= armv7pmu_read_counter,
-	.write_counter		= armv7pmu_write_counter,
-	.get_event_idx		= armv7pmu_get_event_idx,
-	.start			= armv7pmu_start,
-	.stop			= armv7pmu_stop,
-	.reset			= armv7pmu_reset,
-	.max_period		= (1LLU << 32) - 1,
+static void armv7pmu_init(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->handle_irq	= armv7pmu_handle_irq;
+	cpu_pmu->enable		= armv7pmu_enable_event;
+	cpu_pmu->disable	= armv7pmu_disable_event;
+	cpu_pmu->read_counter	= armv7pmu_read_counter;
+	cpu_pmu->write_counter	= armv7pmu_write_counter;
+	cpu_pmu->get_event_idx	= armv7pmu_get_event_idx;
+	cpu_pmu->start		= armv7pmu_start;
+	cpu_pmu->stop		= armv7pmu_stop;
+	cpu_pmu->reset		= armv7pmu_reset;
+	cpu_pmu->max_period	= (1LLU << 32) - 1;
 };
 
 static u32 __devinit armv7_read_num_pmnc_events(void)
@@ -1256,70 +1255,75 @@ static u32 __devinit armv7_read_num_pmnc_events(void)
 	return nb_cnt + 1;
 }
 
-static struct arm_pmu *__devinit armv7_a8_pmu_init(void)
+static int __devinit armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv7pmu.name		= "ARMv7 Cortex-A8";
-	armv7pmu.map_event	= armv7_a8_map_event;
-	armv7pmu.num_events	= armv7_read_num_pmnc_events();
-	return &armv7pmu;
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "ARMv7 Cortex-A8";
+	cpu_pmu->map_event	= armv7_a8_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	return 0;
 }
 
-static struct arm_pmu *__devinit armv7_a9_pmu_init(void)
+static int __devinit armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv7pmu.name		= "ARMv7 Cortex-A9";
-	armv7pmu.map_event	= armv7_a9_map_event;
-	armv7pmu.num_events	= armv7_read_num_pmnc_events();
-	return &armv7pmu;
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "ARMv7 Cortex-A9";
+	cpu_pmu->map_event	= armv7_a9_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	return 0;
 }
 
-static struct arm_pmu *__devinit armv7_a5_pmu_init(void)
+static int __devinit armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv7pmu.name		= "ARMv7 Cortex-A5";
-	armv7pmu.map_event	= armv7_a5_map_event;
-	armv7pmu.num_events	= armv7_read_num_pmnc_events();
-	return &armv7pmu;
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "ARMv7 Cortex-A5";
+	cpu_pmu->map_event	= armv7_a5_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	return 0;
 }
 
-static struct arm_pmu *__devinit armv7_a15_pmu_init(void)
+static int __devinit armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv7pmu.name		= "ARMv7 Cortex-A15";
-	armv7pmu.map_event	= armv7_a15_map_event;
-	armv7pmu.num_events	= armv7_read_num_pmnc_events();
-	armv7pmu.set_event_filter = armv7pmu_set_event_filter;
-	return &armv7pmu;
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "ARMv7 Cortex-A15";
+	cpu_pmu->map_event	= armv7_a15_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+	return 0;
 }
 
-static struct arm_pmu *__devinit armv7_a7_pmu_init(void)
+static int __devinit armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	armv7pmu.name		= "ARMv7 Cortex-A7";
-	armv7pmu.map_event	= armv7_a7_map_event;
-	armv7pmu.num_events	= armv7_read_num_pmnc_events();
-	armv7pmu.set_event_filter = armv7pmu_set_event_filter;
-	return &armv7pmu;
+	armv7pmu_init(cpu_pmu);
+	cpu_pmu->name		= "ARMv7 Cortex-A7";
+	cpu_pmu->map_event	= armv7_a7_map_event;
+	cpu_pmu->num_events	= armv7_read_num_pmnc_events();
+	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
+	return 0;
 }
 #else
-static struct arm_pmu *__devinit armv7_a8_pmu_init(void)
+static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 
-static struct arm_pmu *__devinit armv7_a9_pmu_init(void)
+static inline int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 
-static struct arm_pmu *__devinit armv7_a5_pmu_init(void)
+static inline int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 
-static struct arm_pmu *__devinit armv7_a15_pmu_init(void)
+static inline int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 
-static struct arm_pmu *__devinit armv7_a7_pmu_init(void)
+static inline int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 #endif	/* CONFIG_CPU_V7 */
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 426e19f380a..131ede6c2fd 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -434,24 +434,22 @@ static int xscale_map_event(struct perf_event *event)
 				&xscale_perf_cache_map, 0xFF);
 }
 
-static struct arm_pmu xscale1pmu = {
-	.name		= "xscale1",
-	.handle_irq	= xscale1pmu_handle_irq,
-	.enable		= xscale1pmu_enable_event,
-	.disable	= xscale1pmu_disable_event,
-	.read_counter	= xscale1pmu_read_counter,
-	.write_counter	= xscale1pmu_write_counter,
-	.get_event_idx	= xscale1pmu_get_event_idx,
-	.start		= xscale1pmu_start,
-	.stop		= xscale1pmu_stop,
-	.map_event	= xscale_map_event,
-	.num_events	= 3,
-	.max_period	= (1LLU << 32) - 1,
-};
-
-static struct arm_pmu *__devinit xscale1pmu_init(void)
+static int __devinit xscale1pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return &xscale1pmu;
+	cpu_pmu->name		= "xscale1";
+	cpu_pmu->handle_irq	= xscale1pmu_handle_irq;
+	cpu_pmu->enable		= xscale1pmu_enable_event;
+	cpu_pmu->disable	= xscale1pmu_disable_event;
+	cpu_pmu->read_counter	= xscale1pmu_read_counter;
+	cpu_pmu->write_counter	= xscale1pmu_write_counter;
+	cpu_pmu->get_event_idx	= xscale1pmu_get_event_idx;
+	cpu_pmu->start		= xscale1pmu_start;
+	cpu_pmu->stop		= xscale1pmu_stop;
+	cpu_pmu->map_event	= xscale_map_event;
+	cpu_pmu->num_events	= 3;
+	cpu_pmu->max_period	= (1LLU << 32) - 1;
+
+	return 0;
 }
 
 #define XSCALE2_OVERFLOWED_MASK	0x01f
@@ -801,33 +799,31 @@ xscale2pmu_write_counter(int counter, u32 val)
 	}
 }
 
-static struct arm_pmu xscale2pmu = {
-	.name		= "xscale2",
-	.handle_irq	= xscale2pmu_handle_irq,
-	.enable		= xscale2pmu_enable_event,
-	.disable	= xscale2pmu_disable_event,
-	.read_counter	= xscale2pmu_read_counter,
-	.write_counter	= xscale2pmu_write_counter,
-	.get_event_idx	= xscale2pmu_get_event_idx,
-	.start		= xscale2pmu_start,
-	.stop		= xscale2pmu_stop,
-	.map_event	= xscale_map_event,
-	.num_events	= 5,
-	.max_period	= (1LLU << 32) - 1,
-};
-
-static struct arm_pmu *__devinit xscale2pmu_init(void)
+static int __devinit xscale2pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return &xscale2pmu;
+	cpu_pmu->name		= "xscale2";
+	cpu_pmu->handle_irq	= xscale2pmu_handle_irq;
+	cpu_pmu->enable		= xscale2pmu_enable_event;
+	cpu_pmu->disable	= xscale2pmu_disable_event;
+	cpu_pmu->read_counter	= xscale2pmu_read_counter;
+	cpu_pmu->write_counter	= xscale2pmu_write_counter;
+	cpu_pmu->get_event_idx	= xscale2pmu_get_event_idx;
+	cpu_pmu->start		= xscale2pmu_start;
+	cpu_pmu->stop		= xscale2pmu_stop;
+	cpu_pmu->map_event	= xscale_map_event;
+	cpu_pmu->num_events	= 5;
+	cpu_pmu->max_period	= (1LLU << 32) - 1;
+
+	return 0;
 }
 #else
-static struct arm_pmu *__devinit xscale1pmu_init(void)
+static inline int xscale1pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 
-static struct arm_pmu *__devinit xscale2pmu_init(void)
+static inline int xscale2pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return NULL;
+	return -ENODEV;
 }
 #endif	/* CONFIG_CPU_XSCALE */
-- 
cgit v1.2.3


From ed6f2a522398c26559f4da23a80aa6195e6284c7 Mon Sep 17 00:00:00 2001
From: Sudeep KarkadaNagesha <Sudeep.KarkadaNagesha@arm.com>
Date: Mon, 30 Jul 2012 12:00:02 +0100
Subject: ARM: perf: consistently use struct perf_event in arm_pmu functions

The arm_pmu functions have wildly varied parameters which can often be
derived from struct perf_event.

This patch changes the arm_pmu function prototypes so that struct
perf_event pointers are passed in preference to fields that can be
derived from the event.

Signed-off-by: Sudeep KarkadaNagesha <Sudeep.KarkadaNagesha@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/pmu.h          | 26 +++++-------
 arch/arm/kernel/perf_event.c        | 44 +++++++++----------
 arch/arm/kernel/perf_event_cpu.c    |  8 ++--
 arch/arm/kernel/perf_event_v6.c     | 54 ++++++++++++-----------
 arch/arm/kernel/perf_event_v7.c     | 46 +++++++++++++-------
 arch/arm/kernel/perf_event_xscale.c | 85 +++++++++++++++++++++----------------
 6 files changed, 142 insertions(+), 121 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index a26170dce02..a209a384dbc 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -67,19 +67,19 @@ struct arm_pmu {
 	cpumask_t	active_irqs;
 	char		*name;
 	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
-	void		(*enable)(struct hw_perf_event *evt, int idx);
-	void		(*disable)(struct hw_perf_event *evt, int idx);
+	void		(*enable)(struct perf_event *event);
+	void		(*disable)(struct perf_event *event);
 	int		(*get_event_idx)(struct pmu_hw_events *hw_events,
-					 struct hw_perf_event *hwc);
+					 struct perf_event *event);
 	int		(*set_event_filter)(struct hw_perf_event *evt,
 					    struct perf_event_attr *attr);
-	u32		(*read_counter)(int idx);
-	void		(*write_counter)(int idx, u32 val);
-	void		(*start)(void);
-	void		(*stop)(void);
+	u32		(*read_counter)(struct perf_event *event);
+	void		(*write_counter)(struct perf_event *event, u32 val);
+	void		(*start)(struct arm_pmu *);
+	void		(*stop)(struct arm_pmu *);
 	void		(*reset)(void *);
-	int		(*request_irq)(irq_handler_t handler);
-	void		(*free_irq)(void);
+	int		(*request_irq)(struct arm_pmu *, irq_handler_t handler);
+	void		(*free_irq)(struct arm_pmu *);
 	int		(*map_event)(struct perf_event *event);
 	int		num_events;
 	atomic_t	active_events;
@@ -95,13 +95,9 @@ extern const struct dev_pm_ops armpmu_dev_pm_ops;
 
 int armpmu_register(struct arm_pmu *armpmu, char *name, int type);
 
-u64 armpmu_event_update(struct perf_event *event,
-			struct hw_perf_event *hwc,
-			int idx);
+u64 armpmu_event_update(struct perf_event *event);
 
-int armpmu_event_set_period(struct perf_event *event,
-			    struct hw_perf_event *hwc,
-			    int idx);
+int armpmu_event_set_period(struct perf_event *event);
 
 int armpmu_map_event(struct perf_event *event,
 		     const unsigned (*event_map)[PERF_COUNT_HW_MAX],
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index f8406af0327..1cfa3f35713 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -86,12 +86,10 @@ armpmu_map_event(struct perf_event *event,
 	return -ENOENT;
 }
 
-int
-armpmu_event_set_period(struct perf_event *event,
-			struct hw_perf_event *hwc,
-			int idx)
+int armpmu_event_set_period(struct perf_event *event)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	s64 left = local64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
 	int ret = 0;
@@ -119,24 +117,22 @@ armpmu_event_set_period(struct perf_event *event,
 
 	local64_set(&hwc->prev_count, (u64)-left);
 
-	armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
+	armpmu->write_counter(event, (u64)(-left) & 0xffffffff);
 
 	perf_event_update_userpage(event);
 
 	return ret;
 }
 
-u64
-armpmu_event_update(struct perf_event *event,
-		    struct hw_perf_event *hwc,
-		    int idx)
+u64 armpmu_event_update(struct perf_event *event)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	u64 delta, prev_raw_count, new_raw_count;
 
 again:
 	prev_raw_count = local64_read(&hwc->prev_count);
-	new_raw_count = armpmu->read_counter(idx);
+	new_raw_count = armpmu->read_counter(event);
 
 	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 			     new_raw_count) != prev_raw_count)
@@ -159,7 +155,7 @@ armpmu_read(struct perf_event *event)
 	if (hwc->idx < 0)
 		return;
 
-	armpmu_event_update(event, hwc, hwc->idx);
+	armpmu_event_update(event);
 }
 
 static void
@@ -173,14 +169,13 @@ armpmu_stop(struct perf_event *event, int flags)
 	 * PERF_EF_UPDATE, see comments in armpmu_start().
 	 */
 	if (!(hwc->state & PERF_HES_STOPPED)) {
-		armpmu->disable(hwc, hwc->idx);
-		armpmu_event_update(event, hwc, hwc->idx);
+		armpmu->disable(event);
+		armpmu_event_update(event);
 		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
 	}
 }
 
-static void
-armpmu_start(struct perf_event *event, int flags)
+static void armpmu_start(struct perf_event *event, int flags)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
@@ -200,8 +195,8 @@ armpmu_start(struct perf_event *event, int flags)
 	 * get an interrupt too soon or *way* too late if the overflow has
 	 * happened since disabling.
 	 */
-	armpmu_event_set_period(event, hwc, hwc->idx);
-	armpmu->enable(hwc, hwc->idx);
+	armpmu_event_set_period(event);
+	armpmu->enable(event);
 }
 
 static void
@@ -233,7 +228,7 @@ armpmu_add(struct perf_event *event, int flags)
 	perf_pmu_disable(event->pmu);
 
 	/* If we don't have a space for the counter then finish early. */
-	idx = armpmu->get_event_idx(hw_events, hwc);
+	idx = armpmu->get_event_idx(hw_events, event);
 	if (idx < 0) {
 		err = idx;
 		goto out;
@@ -244,7 +239,7 @@ armpmu_add(struct perf_event *event, int flags)
 	 * sure it is disabled.
 	 */
 	event->hw.idx = idx;
-	armpmu->disable(hwc, idx);
+	armpmu->disable(event);
 	hw_events->events[idx] = event;
 
 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
@@ -264,13 +259,12 @@ validate_event(struct pmu_hw_events *hw_events,
 	       struct perf_event *event)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event fake_event = event->hw;
 	struct pmu *leader_pmu = event->group_leader->pmu;
 
 	if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF)
 		return 1;
 
-	return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
+	return armpmu->get_event_idx(hw_events, event) >= 0;
 }
 
 static int
@@ -316,7 +310,7 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
 static void
 armpmu_release_hardware(struct arm_pmu *armpmu)
 {
-	armpmu->free_irq();
+	armpmu->free_irq(armpmu);
 	pm_runtime_put_sync(&armpmu->plat_device->dev);
 }
 
@@ -330,7 +324,7 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu)
 		return -ENODEV;
 
 	pm_runtime_get_sync(&pmu_device->dev);
-	err = armpmu->request_irq(armpmu_dispatch_irq);
+	err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);
 	if (err) {
 		armpmu_release_hardware(armpmu);
 		return err;
@@ -465,13 +459,13 @@ static void armpmu_enable(struct pmu *pmu)
 	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 
 	if (enabled)
-		armpmu->start();
+		armpmu->start(armpmu);
 }
 
 static void armpmu_disable(struct pmu *pmu)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(pmu);
-	armpmu->stop();
+	armpmu->stop(armpmu);
 }
 
 #ifdef CONFIG_PM_RUNTIME
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 3863fd405fa..02244faa539 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -71,7 +71,7 @@ static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
 	return &__get_cpu_var(cpu_hw_events);
 }
 
-static void cpu_pmu_free_irq(void)
+static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
 {
 	int i, irq, irqs;
 	struct platform_device *pmu_device = cpu_pmu->plat_device;
@@ -87,7 +87,7 @@ static void cpu_pmu_free_irq(void)
 	}
 }
 
-static int cpu_pmu_request_irq(irq_handler_t handler)
+static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 {
 	int i, err, irq, irqs;
 	struct platform_device *pmu_device = cpu_pmu->plat_device;
@@ -148,7 +148,7 @@ static void __devinit cpu_pmu_init(struct arm_pmu *cpu_pmu)
 
 	/* Ensure the PMU has sane values out of reset. */
 	if (cpu_pmu && cpu_pmu->reset)
-		on_each_cpu(cpu_pmu->reset, NULL, 1);
+		on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
 }
 
 /*
@@ -164,7 +164,7 @@ static int __cpuinit cpu_pmu_notify(struct notifier_block *b,
 		return NOTIFY_DONE;
 
 	if (cpu_pmu && cpu_pmu->reset)
-		cpu_pmu->reset(NULL);
+		cpu_pmu->reset(cpu_pmu);
 
 	return NOTIFY_OK;
 }
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index 3908cb4e556..f3e22ff8b6a 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -401,9 +401,10 @@ armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
 	return ret;
 }
 
-static inline u32
-armv6pmu_read_counter(int counter)
+static inline u32 armv6pmu_read_counter(struct perf_event *event)
 {
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
 	unsigned long value = 0;
 
 	if (ARMV6_CYCLE_COUNTER == counter)
@@ -418,10 +419,11 @@ armv6pmu_read_counter(int counter)
 	return value;
 }
 
-static inline void
-armv6pmu_write_counter(int counter,
-		       u32 value)
+static inline void armv6pmu_write_counter(struct perf_event *event, u32 value)
 {
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
 	if (ARMV6_CYCLE_COUNTER == counter)
 		asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
 	else if (ARMV6_COUNTER0 == counter)
@@ -432,12 +434,13 @@ armv6pmu_write_counter(int counter,
 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
 }
 
-static void
-armv6pmu_enable_event(struct hw_perf_event *hwc,
-		      int idx)
+static void armv6pmu_enable_event(struct perf_event *event)
 {
 	unsigned long val, mask, evt, flags;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= 0;
@@ -473,7 +476,8 @@ armv6pmu_handle_irq(int irq_num,
 {
 	unsigned long pmcr = armv6_pmcr_read();
 	struct perf_sample_data data;
-	struct pmu_hw_events *cpuc;
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
 	struct pt_regs *regs;
 	int idx;
 
@@ -489,7 +493,6 @@ armv6pmu_handle_irq(int irq_num,
 	 */
 	armv6_pmcr_write(pmcr);
 
-	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
@@ -506,13 +509,13 @@ armv6pmu_handle_irq(int irq_num,
 			continue;
 
 		hwc = &event->hw;
-		armpmu_event_update(event, hwc, idx);
+		armpmu_event_update(event);
 		perf_sample_data_init(&data, 0, hwc->last_period);
-		if (!armpmu_event_set_period(event, hwc, idx))
+		if (!armpmu_event_set_period(event))
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			cpu_pmu->disable(hwc, idx);
+			cpu_pmu->disable(event);
 	}
 
 	/*
@@ -527,8 +530,7 @@ armv6pmu_handle_irq(int irq_num,
 	return IRQ_HANDLED;
 }
 
-static void
-armv6pmu_start(void)
+static void armv6pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -540,8 +542,7 @@ armv6pmu_start(void)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void
-armv6pmu_stop(void)
+static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -555,10 +556,11 @@ armv6pmu_stop(void)
 
 static int
 armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
-		       struct hw_perf_event *event)
+				struct perf_event *event)
 {
+	struct hw_perf_event *hwc = &event->hw;
 	/* Always place a cycle counter into the cycle counter. */
-	if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
+	if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) {
 		if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
 			return -EAGAIN;
 
@@ -579,12 +581,13 @@ armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
 	}
 }
 
-static void
-armv6pmu_disable_event(struct hw_perf_event *hwc,
-		       int idx)
+static void armv6pmu_disable_event(struct perf_event *event)
 {
 	unsigned long val, mask, evt, flags;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= ARMV6_PMCR_CCOUNT_IEN;
@@ -613,12 +616,13 @@ armv6pmu_disable_event(struct hw_perf_event *hwc,
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void
-armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
-			      int idx)
+static void armv6mpcore_pmu_disable_event(struct perf_event *event)
 {
 	unsigned long val, mask, flags, evt = 0;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= ARMV6_PMCR_CCOUNT_IEN;
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index b189403f30e..1183c81087b 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -840,8 +840,10 @@ static inline int armv7_pmnc_select_counter(int idx)
 	return idx;
 }
 
-static inline u32 armv7pmu_read_counter(int idx)
+static inline u32 armv7pmu_read_counter(struct perf_event *event)
 {
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
 	u32 value = 0;
 
 	if (!armv7_pmnc_counter_valid(idx))
@@ -855,8 +857,11 @@ static inline u32 armv7pmu_read_counter(int idx)
 	return value;
 }
 
-static inline void armv7pmu_write_counter(int idx, u32 value)
+static inline void armv7pmu_write_counter(struct perf_event *event, u32 value)
 {
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
 	if (!armv7_pmnc_counter_valid(idx))
 		pr_err("CPU%u writing wrong counter %d\n",
 			smp_processor_id(), idx);
@@ -991,10 +996,13 @@ static void armv7_pmnc_dump_regs(void)
 }
 #endif
 
-static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
+static void armv7pmu_enable_event(struct perf_event *event)
 {
 	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	/*
 	 * Enable counter and interrupt, and set the counter to count
@@ -1028,10 +1036,13 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
+static void armv7pmu_disable_event(struct perf_event *event)
 {
 	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	/*
 	 * Disable counter and interrupt
@@ -1055,7 +1066,8 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 {
 	u32 pmnc;
 	struct perf_sample_data data;
-	struct pmu_hw_events *cpuc;
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
 	struct pt_regs *regs;
 	int idx;
 
@@ -1075,7 +1087,6 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	 */
 	regs = get_irq_regs();
 
-	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
@@ -1092,13 +1103,13 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		hwc = &event->hw;
-		armpmu_event_update(event, hwc, idx);
+		armpmu_event_update(event);
 		perf_sample_data_init(&data, 0, hwc->last_period);
-		if (!armpmu_event_set_period(event, hwc, idx))
+		if (!armpmu_event_set_period(event))
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			cpu_pmu->disable(hwc, idx);
+			cpu_pmu->disable(event);
 	}
 
 	/*
@@ -1113,7 +1124,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	return IRQ_HANDLED;
 }
 
-static void armv7pmu_start(void)
+static void armv7pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -1124,7 +1135,7 @@ static void armv7pmu_start(void)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void armv7pmu_stop(void)
+static void armv7pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -1136,10 +1147,12 @@ static void armv7pmu_stop(void)
 }
 
 static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc,
-				  struct hw_perf_event *event)
+				  struct perf_event *event)
 {
 	int idx;
-	unsigned long evtype = event->config_base & ARMV7_EVTYPE_EVENT;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long evtype = hwc->config_base & ARMV7_EVTYPE_EVENT;
 
 	/* Always place a cycle counter into the cycle counter. */
 	if (evtype == ARMV7_PERFCTR_CPU_CYCLES) {
@@ -1190,11 +1203,14 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event,
 
 static void armv7pmu_reset(void *info)
 {
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
 	u32 idx, nb_cnt = cpu_pmu->num_events;
 
 	/* The counter and interrupt enable registers are unknown at reset. */
-	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx)
-		armv7pmu_disable_event(NULL, idx);
+	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+		armv7_pmnc_disable_counter(idx);
+		armv7_pmnc_disable_intens(idx);
+	}
 
 	/* Initialize & Reset PMNC: C and P bits */
 	armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 131ede6c2fd..0c8265e53d5 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -224,7 +224,8 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 {
 	unsigned long pmnc;
 	struct perf_sample_data data;
-	struct pmu_hw_events *cpuc;
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
 	struct pt_regs *regs;
 	int idx;
 
@@ -248,7 +249,6 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 
 	regs = get_irq_regs();
 
-	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
@@ -260,13 +260,13 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		hwc = &event->hw;
-		armpmu_event_update(event, hwc, idx);
+		armpmu_event_update(event);
 		perf_sample_data_init(&data, 0, hwc->last_period);
-		if (!armpmu_event_set_period(event, hwc, idx))
+		if (!armpmu_event_set_period(event))
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			cpu_pmu->disable(hwc, idx);
+			cpu_pmu->disable(event);
 	}
 
 	irq_work_run();
@@ -280,11 +280,13 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 	return IRQ_HANDLED;
 }
 
-static void
-xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
+static void xscale1pmu_enable_event(struct perf_event *event)
 {
 	unsigned long val, mask, evt, flags;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	switch (idx) {
 	case XSCALE_CYCLE_COUNTER:
@@ -314,11 +316,13 @@ xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void
-xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
+static void xscale1pmu_disable_event(struct perf_event *event)
 {
 	unsigned long val, mask, evt, flags;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	switch (idx) {
 	case XSCALE_CYCLE_COUNTER:
@@ -348,9 +352,10 @@ xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
 
 static int
 xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
-			struct hw_perf_event *event)
+				struct perf_event *event)
 {
-	if (XSCALE_PERFCTR_CCNT == event->config_base) {
+	struct hw_perf_event *hwc = &event->hw;
+	if (XSCALE_PERFCTR_CCNT == hwc->config_base) {
 		if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
 			return -EAGAIN;
 
@@ -366,8 +371,7 @@ xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
 	}
 }
 
-static void
-xscale1pmu_start(void)
+static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -379,8 +383,7 @@ xscale1pmu_start(void)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void
-xscale1pmu_stop(void)
+static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -392,9 +395,10 @@ xscale1pmu_stop(void)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static inline u32
-xscale1pmu_read_counter(int counter)
+static inline u32 xscale1pmu_read_counter(struct perf_event *event)
 {
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
 	u32 val = 0;
 
 	switch (counter) {
@@ -412,9 +416,11 @@ xscale1pmu_read_counter(int counter)
 	return val;
 }
 
-static inline void
-xscale1pmu_write_counter(int counter, u32 val)
+static inline void xscale1pmu_write_counter(struct perf_event *event, u32 val)
 {
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
 	switch (counter) {
 	case XSCALE_CYCLE_COUNTER:
 		asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
@@ -565,7 +571,8 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 {
 	unsigned long pmnc, of_flags;
 	struct perf_sample_data data;
-	struct pmu_hw_events *cpuc;
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
 	struct pt_regs *regs;
 	int idx;
 
@@ -583,7 +590,6 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 
 	regs = get_irq_regs();
 
-	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
@@ -595,13 +601,13 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		hwc = &event->hw;
-		armpmu_event_update(event, hwc, idx);
+		armpmu_event_update(event);
 		perf_sample_data_init(&data, 0, hwc->last_period);
-		if (!armpmu_event_set_period(event, hwc, idx))
+		if (!armpmu_event_set_period(event))
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			cpu_pmu->disable(hwc, idx);
+			cpu_pmu->disable(event);
 	}
 
 	irq_work_run();
@@ -615,11 +621,13 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 	return IRQ_HANDLED;
 }
 
-static void
-xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
+static void xscale2pmu_enable_event(struct perf_event *event)
 {
 	unsigned long flags, ien, evtsel;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	ien = xscale2pmu_read_int_enable();
 	evtsel = xscale2pmu_read_event_select();
@@ -659,11 +667,13 @@ xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void
-xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
+static void xscale2pmu_disable_event(struct perf_event *event)
 {
 	unsigned long flags, ien, evtsel, of_flags;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
 
 	ien = xscale2pmu_read_int_enable();
 	evtsel = xscale2pmu_read_event_select();
@@ -711,7 +721,7 @@ xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
 
 static int
 xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc,
-			struct hw_perf_event *event)
+				struct perf_event *event)
 {
 	int idx = xscale1pmu_get_event_idx(cpuc, event);
 	if (idx >= 0)
@@ -725,8 +735,7 @@ out:
 	return idx;
 }
 
-static void
-xscale2pmu_start(void)
+static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -738,8 +747,7 @@ xscale2pmu_start(void)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void
-xscale2pmu_stop(void)
+static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags, val;
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
@@ -751,9 +759,10 @@ xscale2pmu_stop(void)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static inline u32
-xscale2pmu_read_counter(int counter)
+static inline u32 xscale2pmu_read_counter(struct perf_event *event)
 {
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
 	u32 val = 0;
 
 	switch (counter) {
@@ -777,9 +786,11 @@ xscale2pmu_read_counter(int counter)
 	return val;
 }
 
-static inline void
-xscale2pmu_write_counter(int counter, u32 val)
+static inline void xscale2pmu_write_counter(struct perf_event *event, u32 val)
 {
+	struct hw_perf_event *hwc = &event->hw;
+	int counter = hwc->idx;
+
 	switch (counter) {
 	case XSCALE_CYCLE_COUNTER:
 		asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
-- 
cgit v1.2.3


From 7279adbd9bb8ef8ff669da50f0e84c65a14022b5 Mon Sep 17 00:00:00 2001
From: Sudeep KarkadaNagesha <Sudeep.KarkadaNagesha@arm.com>
Date: Fri, 20 Jul 2012 15:18:07 +0100
Subject: ARM: perf: check ARMv7 counter validity on a per-pmu basis

Multi-cluster ARMv7 systems may have CPU PMUs with different number of
counters.

This patch updates armv7_pmnc_counter_valid so that it takes a pmu
argument and checks the counter validity against that. We also remove a
number of redundant counter checks whether the current PMU is not easily
retrievable.

Signed-off-by: Sudeep KarkadaNagesha <Sudeep.KarkadaNagesha@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event_v7.c | 94 +++++++++++++----------------------------
 1 file changed, 30 insertions(+), 64 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 1183c81087b..7d0cce85d17 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -736,7 +736,8 @@ static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
  */
 #define	ARMV7_IDX_CYCLE_COUNTER	0
 #define	ARMV7_IDX_COUNTER0	1
-#define	ARMV7_IDX_COUNTER_LAST	(ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
+#define	ARMV7_IDX_COUNTER_LAST(cpu_pmu) \
+	(ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
 
 #define	ARMV7_MAX_COUNTERS	32
 #define	ARMV7_COUNTER_MASK	(ARMV7_MAX_COUNTERS - 1)
@@ -802,38 +803,20 @@ static inline int armv7_pmnc_has_overflowed(u32 pmnc)
 	return pmnc & ARMV7_OVERFLOWED_MASK;
 }
 
-static inline int armv7_pmnc_counter_valid(int idx)
+static inline int armv7_pmnc_counter_valid(struct arm_pmu *cpu_pmu, int idx)
 {
-	return idx >= ARMV7_IDX_CYCLE_COUNTER && idx <= ARMV7_IDX_COUNTER_LAST;
+	return idx >= ARMV7_IDX_CYCLE_COUNTER &&
+		idx <= ARMV7_IDX_COUNTER_LAST(cpu_pmu);
 }
 
 static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx)
 {
-	int ret = 0;
-	u32 counter;
-
-	if (!armv7_pmnc_counter_valid(idx)) {
-		pr_err("CPU%u checking wrong counter %d overflow status\n",
-			smp_processor_id(), idx);
-	} else {
-		counter = ARMV7_IDX_TO_COUNTER(idx);
-		ret = pmnc & BIT(counter);
-	}
-
-	return ret;
+	return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx));
 }
 
 static inline int armv7_pmnc_select_counter(int idx)
 {
-	u32 counter;
-
-	if (!armv7_pmnc_counter_valid(idx)) {
-		pr_err("CPU%u selecting wrong PMNC counter %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV7_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
 	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter));
 	isb();
 
@@ -842,11 +825,12 @@ static inline int armv7_pmnc_select_counter(int idx)
 
 static inline u32 armv7pmu_read_counter(struct perf_event *event)
 {
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 	u32 value = 0;
 
-	if (!armv7_pmnc_counter_valid(idx))
+	if (!armv7_pmnc_counter_valid(cpu_pmu, idx))
 		pr_err("CPU%u reading wrong counter %d\n",
 			smp_processor_id(), idx);
 	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
@@ -859,10 +843,11 @@ static inline u32 armv7pmu_read_counter(struct perf_event *event)
 
 static inline void armv7pmu_write_counter(struct perf_event *event, u32 value)
 {
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
-	if (!armv7_pmnc_counter_valid(idx))
+	if (!armv7_pmnc_counter_valid(cpu_pmu, idx))
 		pr_err("CPU%u writing wrong counter %d\n",
 			smp_processor_id(), idx);
 	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
@@ -881,60 +866,28 @@ static inline void armv7_pmnc_write_evtsel(int idx, u32 val)
 
 static inline int armv7_pmnc_enable_counter(int idx)
 {
-	u32 counter;
-
-	if (!armv7_pmnc_counter_valid(idx)) {
-		pr_err("CPU%u enabling wrong PMNC counter %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV7_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
 	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter)));
 	return idx;
 }
 
 static inline int armv7_pmnc_disable_counter(int idx)
 {
-	u32 counter;
-
-	if (!armv7_pmnc_counter_valid(idx)) {
-		pr_err("CPU%u disabling wrong PMNC counter %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV7_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
 	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter)));
 	return idx;
 }
 
 static inline int armv7_pmnc_enable_intens(int idx)
 {
-	u32 counter;
-
-	if (!armv7_pmnc_counter_valid(idx)) {
-		pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV7_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
 	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter)));
 	return idx;
 }
 
 static inline int armv7_pmnc_disable_intens(int idx)
 {
-	u32 counter;
-
-	if (!armv7_pmnc_counter_valid(idx)) {
-		pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV7_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV7_IDX_TO_COUNTER(idx);
 	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter)));
 	isb();
 	/* Clear the overflow flag in case an interrupt is pending. */
@@ -959,7 +912,7 @@ static inline u32 armv7_pmnc_getreset_flags(void)
 }
 
 #ifdef DEBUG
-static void armv7_pmnc_dump_regs(void)
+static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
 {
 	u32 val;
 	unsigned int cnt;
@@ -984,7 +937,8 @@ static void armv7_pmnc_dump_regs(void)
 	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
 	printk(KERN_INFO "CCNT  =0x%08x\n", val);
 
-	for (cnt = ARMV7_IDX_COUNTER0; cnt <= ARMV7_IDX_COUNTER_LAST; cnt++) {
+	for (cnt = ARMV7_IDX_COUNTER0;
+			cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
 		armv7_pmnc_select_counter(cnt);
 		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
 		printk(KERN_INFO "CNT[%d] count =0x%08x\n",
@@ -1004,6 +958,12 @@ static void armv7pmu_enable_event(struct perf_event *event)
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 	int idx = hwc->idx;
 
+	if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+		pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return;
+	}
+
 	/*
 	 * Enable counter and interrupt, and set the counter to count
 	 * the event that we're interested in.
@@ -1044,6 +1004,12 @@ static void armv7pmu_disable_event(struct perf_event *event)
 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 	int idx = hwc->idx;
 
+	if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
+		pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return;
+	}
+
 	/*
 	 * Disable counter and interrupt
 	 */
-- 
cgit v1.2.3


From 2a4961ba89ffae388a553175db51dd58ce7c39a8 Mon Sep 17 00:00:00 2001
From: Mark Rutland <Mark.Rutland@arm.com>
Date: Fri, 21 Sep 2012 11:53:41 +0100
Subject: ARM: perf: register cpu_notifier at driver init

The current practice of registering the cpu hotplug notifier at PMU
registration time won't be safe with multiple PMUs, as we'll repeatedly
attempt to register the notifier. This has the unfortunate effect of
silently corrupting the notifier list, leading to boot stalling.

Instead, register the notifier at init time. Its sanity checks will
prevent anything bad from happening if the notifier is called before we
have any PMUs registered.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event_cpu.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 02244faa539..71c824ce020 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -285,7 +285,6 @@ static int __devinit cpu_pmu_device_probe(struct platform_device *pdev)
 	cpu_pmu = pmu;
 	cpu_pmu->plat_device = pdev;
 	cpu_pmu_init(cpu_pmu);
-	register_cpu_notifier(&cpu_pmu_hotplug_notifier);
 	armpmu_register(cpu_pmu, cpu_pmu->name, PERF_TYPE_RAW);
 
 	return 0;
@@ -303,6 +302,16 @@ static struct platform_driver cpu_pmu_driver = {
 
 static int __init register_pmu_driver(void)
 {
-	return platform_driver_register(&cpu_pmu_driver);
+	int err;
+
+	err = register_cpu_notifier(&cpu_pmu_hotplug_notifier);
+	if (err)
+		return err;
+
+	err = platform_driver_register(&cpu_pmu_driver);
+	if (err)
+		unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
+
+	return err;
 }
 device_initcall(register_pmu_driver);
-- 
cgit v1.2.3


From 288700d16d4c1479aa00e3db13caed8bb7c83e9f Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 21 Sep 2012 14:14:17 +0100
Subject: ARM: perf: return NOTIFY_DONE from cpu notifier when no available PMU

When attempting to reset the PMU state for either a NULL PMU or a PMU
implementation without a reset function, return NOTIFY_DONE from the CPU
notifier as we don't care about the hotplug event.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event_cpu.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 71c824ce020..db9c6b530f3 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -165,6 +165,8 @@ static int __cpuinit cpu_pmu_notify(struct notifier_block *b,
 
 	if (cpu_pmu && cpu_pmu->reset)
 		cpu_pmu->reset(cpu_pmu);
+	else
+		return NOTIFY_DONE;
 
 	return NOTIFY_OK;
 }
-- 
cgit v1.2.3


From 0305230a3d92d6829db89c9e0c096d4d8733f317 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 21 Sep 2012 14:23:47 +0100
Subject: ARM: perf: consistently use arm_pmu->name for PMU name

Perf has three ways to name a PMU: either by passing an explicit char *,
reading arm_pmu->name or accessing arm_pmu->pmu.name.

Just use arm_pmu->name consistently in the ARM backend.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/pmu.h       | 2 +-
 arch/arm/kernel/perf_event.c     | 4 ++--
 arch/arm/kernel/perf_event_cpu.c | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index a209a384dbc..f24edad26c7 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -93,7 +93,7 @@ struct arm_pmu {
 
 extern const struct dev_pm_ops armpmu_dev_pm_ops;
 
-int armpmu_register(struct arm_pmu *armpmu, char *name, int type);
+int armpmu_register(struct arm_pmu *armpmu, int type);
 
 u64 armpmu_event_update(struct perf_event *event);
 
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 1cfa3f35713..1243deda5bb 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -511,12 +511,12 @@ static void __init armpmu_init(struct arm_pmu *armpmu)
 	};
 }
 
-int armpmu_register(struct arm_pmu *armpmu, char *name, int type)
+int armpmu_register(struct arm_pmu *armpmu, int type)
 {
 	armpmu_init(armpmu);
 	pr_info("enabled with %s PMU driver, %d counters available\n",
 			armpmu->name, armpmu->num_events);
-	return perf_pmu_register(&armpmu->pmu, name, type);
+	return perf_pmu_register(&armpmu->pmu, armpmu->name, type);
 }
 
 /*
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index db9c6b530f3..9a4f6307a01 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -46,7 +46,7 @@ const char *perf_pmu_name(void)
 	if (!cpu_pmu)
 		return NULL;
 
-	return cpu_pmu->pmu.name;
+	return cpu_pmu->name;
 }
 EXPORT_SYMBOL_GPL(perf_pmu_name);
 
@@ -287,7 +287,7 @@ static int __devinit cpu_pmu_device_probe(struct platform_device *pdev)
 	cpu_pmu = pmu;
 	cpu_pmu->plat_device = pdev;
 	cpu_pmu_init(cpu_pmu);
-	armpmu_register(cpu_pmu, cpu_pmu->name, PERF_TYPE_RAW);
+	armpmu_register(cpu_pmu, PERF_TYPE_RAW);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 2ac29a14a8b6b4a37c09c50db88dc893e6e7fc75 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jon-hunter@ti.com>
Date: Thu, 25 Oct 2012 21:23:18 +0100
Subject: ARM: PMU: fix runtime PM enable

Commit 7be2958 (ARM: PMU: Add runtime PM Support) updated the ARM PMU code to
use runtime PM which was prototyped and validated on the OMAP devices. In this
commit, there is no call pm_runtime_enable() and for OMAP devices
pm_runtime_enable() is currently being called from the OMAP PMU code when the
PMU device is created. However, there are two problems with this:

1. For any other ARM device wishing to use runtime PM for PMU they will need
   to call pm_runtime_enable() for runtime PM to work.
2. When booting with device-tree and using device-tree to create the PMU
   device, pm_runtime_enable() needs to be called from within the ARM PERF
   driver as we are no longer calling any device specific code to create the
   device. Hence, PMU does not work on OMAP devices that use the runtime PM
   callbacks when using device-tree to create the PMU device.

Therefore,  call pm_runtime_enable() directly from the ARM PMU driver when
registering the device. For platforms that do not use runtime PM,
pm_runtime_enable() does nothing and for platforms that do use runtime PM but
may not require it specifically for PMU, this will just add a little overhead
when initialising and uninitialising the PMU device.

Tested with PERF on OMAP2420, OMAP3430 and OMAP4460.

Acked-by: Kevin Hilman <khilman@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Jon Hunter <jon-hunter@ti.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event.c | 1 +
 arch/arm/mach-omap2/pmu.c    | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 1243deda5bb..f9e8657dd24 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -514,6 +514,7 @@ static void __init armpmu_init(struct arm_pmu *armpmu)
 int armpmu_register(struct arm_pmu *armpmu, int type)
 {
 	armpmu_init(armpmu);
+	pm_runtime_enable(&armpmu->plat_device->dev);
 	pr_info("enabled with %s PMU driver, %d counters available\n",
 			armpmu->name, armpmu->num_events);
 	return perf_pmu_register(&armpmu->pmu, armpmu->name, type);
diff --git a/arch/arm/mach-omap2/pmu.c b/arch/arm/mach-omap2/pmu.c
index 2a791766283..031e2fbd0e1 100644
--- a/arch/arm/mach-omap2/pmu.c
+++ b/arch/arm/mach-omap2/pmu.c
@@ -57,8 +57,6 @@ static int __init omap2_init_pmu(unsigned oh_num, char *oh_names[])
 	if (IS_ERR(omap_pmu_dev))
 		return PTR_ERR(omap_pmu_dev);
 
-	pm_runtime_enable(&omap_pmu_dev->dev);
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From e64877dcf5fd05d81fa195785a738f3a729587a3 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 21 Sep 2012 14:53:13 +0100
Subject: ARM: hw_breakpoint: only clear OS lock when implemented on v7

The OS save and restore register are optional in debug architecture v7,
so check the status register before attempting to clear the OS lock.

Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/hw_breakpoint.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 281bf330124..76a650a1a1d 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -906,7 +906,7 @@ static struct undef_hook debug_reg_hook = {
 static void reset_ctrl_regs(void *unused)
 {
 	int i, raw_num_brps, err = 0, cpu = smp_processor_id();
-	u32 dbg_power;
+	u32 val;
 
 	/*
 	 * v7 debug contains save and restore registers so that debug state
@@ -926,16 +926,23 @@ static void reset_ctrl_regs(void *unused)
 		 * Ensure sticky power-down is clear (i.e. debug logic is
 		 * powered up).
 		 */
-		asm volatile("mrc p14, 0, %0, c1, c5, 4" : "=r" (dbg_power));
-		if ((dbg_power & 0x1) == 0)
+		asm volatile("mrc p14, 0, %0, c1, c5, 4" : "=r" (val));
+		if ((val & 0x1) == 0)
 			err = -EPERM;
+
+		/*
+		 * Check whether we implement OS save and restore.
+		 */
+		asm volatile("mrc p14, 0, %0, c1, c1, 4" : "=r" (val));
+		if ((val & 0x9) == 0)
+			goto clear_vcr;
 		break;
 	case ARM_DEBUG_ARCH_V7_1:
 		/*
 		 * Ensure the OS double lock is clear.
 		 */
-		asm volatile("mrc p14, 0, %0, c1, c3, 4" : "=r" (dbg_power));
-		if ((dbg_power & 0x1) == 1)
+		asm volatile("mrc p14, 0, %0, c1, c3, 4" : "=r" (val));
+		if ((val & 0x1) == 1)
 			err = -EPERM;
 		break;
 	}
@@ -947,7 +954,7 @@ static void reset_ctrl_regs(void *unused)
 	}
 
 	/*
-	 * Unconditionally clear the lock by writing a value
+	 * Unconditionally clear the OS lock by writing a value
 	 * other than 0xC5ACCE55 to the access register.
 	 */
 	asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0));
@@ -957,6 +964,7 @@ static void reset_ctrl_regs(void *unused)
 	 * Clear any configured vector-catch events before
 	 * enabling monitor mode.
 	 */
+clear_vcr:
 	asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0));
 	isb();
 
-- 
cgit v1.2.3


From b59a540ca927ea84bb0590b9d8076f50c969abb4 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 21 Sep 2012 15:08:17 +0100
Subject: ARM: hw_breakpoint: fix monitor mode detection with v7.1

Detecting whether halting debug is enabled is no longer possible via
the DBGDSCR in v7.1, returning an UNKNOWN value for the HDBGen bit via
CP14 when the OS lock is clear.

This patch removes the halting mode check and ensures that accesses to
the internal and external views of the DBGDSCR are serialised with an
instruction barrier.

Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/hw_breakpoint.c | 25 +++++--------------------
 1 file changed, 5 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 76a650a1a1d..c87ea68d64a 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -235,13 +235,6 @@ static int enable_monitor_mode(void)
 
 	ARM_DBG_READ(c1, 0, dscr);
 
-	/* Ensure that halting mode is disabled. */
-	if (WARN_ONCE(dscr & ARM_DSCR_HDBGEN,
-		"halting debug mode enabled. Unable to access hardware resources.\n")) {
-		ret = -EPERM;
-		goto out;
-	}
-
 	/* If monitor mode is already enabled, just return. */
 	if (dscr & ARM_DSCR_MDBGEN)
 		goto out;
@@ -255,6 +248,7 @@ static int enable_monitor_mode(void)
 	case ARM_DEBUG_ARCH_V7_ECP14:
 	case ARM_DEBUG_ARCH_V7_1:
 		ARM_DBG_WRITE(c2, 2, (dscr | ARM_DSCR_MDBGEN));
+		isb();
 		break;
 	default:
 		ret = -ENODEV;
@@ -1000,8 +994,6 @@ static struct notifier_block __cpuinitdata dbg_reset_nb = {
 
 static int __init arch_hw_breakpoint_init(void)
 {
-	u32 dscr;
-
 	debug_arch = get_debug_arch();
 
 	if (!debug_arch_supported()) {
@@ -1036,17 +1028,10 @@ static int __init arch_hw_breakpoint_init(void)
 		core_num_brps, core_has_mismatch_brps() ? "(+1 reserved) " :
 		"", core_num_wrps);
 
-	ARM_DBG_READ(c1, 0, dscr);
-	if (dscr & ARM_DSCR_HDBGEN) {
-		max_watchpoint_len = 4;
-		pr_warning("halting debug mode enabled. Assuming maximum watchpoint size of %u bytes.\n",
-			   max_watchpoint_len);
-	} else {
-		/* Work out the maximum supported watchpoint length. */
-		max_watchpoint_len = get_max_wp_len();
-		pr_info("maximum watchpoint size is %u bytes.\n",
-				max_watchpoint_len);
-	}
+	/* Work out the maximum supported watchpoint length. */
+	max_watchpoint_len = get_max_wp_len();
+	pr_info("maximum watchpoint size is %u bytes.\n",
+			max_watchpoint_len);
 
 	/* Register debug fault handler. */
 	hook_fault_code(FAULT_CODE_DEBUG, hw_breakpoint_pending, SIGTRAP,
-- 
cgit v1.2.3


From 614bea500a88be2a841af0967469961470f2be83 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 21 Sep 2012 15:38:26 +0100
Subject: ARM: hw_breakpoint: fix ordering of debug register reset sequence

The debug register reset sequence for v7 and v7.1 is congruent with
tap-dancing through a minefield.

Rather than wait until we've blown ourselves to pieces, this patch
instead checks the debug_err_mask after each potentially faulting
operation. We also move the enabling of monitor_mode to the end of the
sequence in order to prevent spurious debug events generated by UNKNOWN
register values.

Reported-by: Stephen Boyd <sboyd@codeaurora.org>
Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/hw_breakpoint.c | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index c87ea68d64a..ae6bf80e3a5 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -231,8 +231,6 @@ static int get_num_brps(void)
 static int enable_monitor_mode(void)
 {
 	u32 dscr;
-	int ret = 0;
-
 	ARM_DBG_READ(c1, 0, dscr);
 
 	/* If monitor mode is already enabled, just return. */
@@ -251,17 +249,18 @@ static int enable_monitor_mode(void)
 		isb();
 		break;
 	default:
-		ret = -ENODEV;
-		goto out;
+		return -ENODEV;
 	}
 
 	/* Check that the write made it through. */
 	ARM_DBG_READ(c1, 0, dscr);
-	if (!(dscr & ARM_DSCR_MDBGEN))
-		ret = -EPERM;
+	if (WARN_ONCE(!(dscr & ARM_DSCR_MDBGEN),
+		"Failed to enable monitor mode on CPU %d.\n",
+		smp_processor_id()))
+		return -EPERM;
 
 out:
-	return ret;
+	return 0;
 }
 
 int hw_breakpoint_slots(int type)
@@ -962,11 +961,16 @@ clear_vcr:
 	asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0));
 	isb();
 
-reset_regs:
-	if (enable_monitor_mode())
+	if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) {
+		pr_warning("CPU %d failed to disable vector catch\n", cpu);
 		return;
+	}
 
-	/* We must also reset any reserved registers. */
+reset_regs:
+	/*
+	 * The control/value register pairs are UNKNOWN out of reset so
+	 * clear them to avoid spurious debug events.
+	 */
 	raw_num_brps = get_num_brp_resources();
 	for (i = 0; i < raw_num_brps; ++i) {
 		write_wb_reg(ARM_BASE_BCR + i, 0UL);
@@ -977,6 +981,18 @@ reset_regs:
 		write_wb_reg(ARM_BASE_WCR + i, 0UL);
 		write_wb_reg(ARM_BASE_WVR + i, 0UL);
 	}
+
+	if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) {
+		pr_warning("CPU %d failed to clear debug register pairs\n", cpu);
+		return;
+	}
+
+	/*
+	 * Have a crack at enabling monitor mode. We don't actually need
+	 * it yet, but reporting an error early is useful if it fails.
+	 */
+	if (enable_monitor_mode())
+		cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu));
 }
 
 static int __cpuinit dbg_reset_notify(struct notifier_block *self,
-- 
cgit v1.2.3


From 7f4050a07be8ce5fad069722326ccd550577a93a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 21 Sep 2012 17:53:08 +0100
Subject: ARM: hw_breakpoint: don't try to clear v6 debug registers during boot

v6 cores do not provide a way to clear the debug registers without first
enabling monitor mode, meaning that we could take spurious debug
exceptions. Instead, rely on the registers being in a sane state when we
boot as they are defined to be disabled out of reset anyway.

Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/hw_breakpoint.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index ae6bf80e3a5..8cd52faeb77 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -912,8 +912,8 @@ static void reset_ctrl_regs(void *unused)
 	switch (debug_arch) {
 	case ARM_DEBUG_ARCH_V6:
 	case ARM_DEBUG_ARCH_V6_1:
-		/* ARMv6 cores just need to reset the registers. */
-		goto reset_regs;
+		/* ARMv6 cores clear the registers out of reset. */
+		goto out_mdbgen;
 	case ARM_DEBUG_ARCH_V7_ECP14:
 		/*
 		 * Ensure sticky power-down is clear (i.e. debug logic is
@@ -966,7 +966,6 @@ clear_vcr:
 		return;
 	}
 
-reset_regs:
 	/*
 	 * The control/value register pairs are UNKNOWN out of reset so
 	 * clear them to avoid spurious debug events.
@@ -991,6 +990,7 @@ reset_regs:
 	 * Have a crack at enabling monitor mode. We don't actually need
 	 * it yet, but reporting an error early is useful if it fails.
 	 */
+out_mdbgen:
 	if (enable_monitor_mode())
 		cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu));
 }
-- 
cgit v1.2.3


From 5ad29ea24e58777aa1daaa2255670ffb40aefd99 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 21 Sep 2012 18:17:24 +0100
Subject: ARM: hw_breakpoint: make boot quieter without CPUID feature registers

Booting on a v6 core without the CPUID feature registers (e.g. 1136)
leads to a noisy dmesg complaining about their absence.

This patch changes the pr_warning into a pr_warn_once to keep the log
quieter.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/hw_breakpoint.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 8cd52faeb77..983558eb902 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -136,8 +136,8 @@ static u8 get_debug_arch(void)
 
 	/* Do we implement the extended CPUID interface? */
 	if (((read_cpuid_id() >> 16) & 0xf) != 0xf) {
-		pr_warning("CPUID feature registers not supported. "
-			   "Assuming v6 debug is present.\n");
+		pr_warn_once("CPUID feature registers not supported. "
+			     "Assuming v6 debug is present.\n");
 		return ARM_DEBUG_ARCH_V6;
 	}
 
-- 
cgit v1.2.3


From 0daa034e696ac601061cbf60fda41ad39678ae14 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 24 Sep 2012 18:01:13 +0100
Subject: ARM: hw_breakpoint: check if monitor mode is enabled during
 validation

Rather than attempt to enable monitor mode explicitly when scheduling in
a breakpoint event (which could raise an undefined exception trap when
accessing DBGDSCRext), instead check that DBGDSCRint.MDBGen is set
during event validation and report an error to the caller if not.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/hw_breakpoint.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 983558eb902..8e9532152bd 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -228,6 +228,13 @@ static int get_num_brps(void)
  * be put into halting debug mode at any time by an external debugger
  * but there is nothing we can do to prevent that.
  */
+static int monitor_mode_enabled(void)
+{
+	u32 dscr;
+	ARM_DBG_READ(c1, 0, dscr);
+	return !!(dscr & ARM_DSCR_MDBGEN);
+}
+
 static int enable_monitor_mode(void)
 {
 	u32 dscr;
@@ -321,14 +328,9 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 {
 	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 	struct perf_event **slot, **slots;
-	int i, max_slots, ctrl_base, val_base, ret = 0;
+	int i, max_slots, ctrl_base, val_base;
 	u32 addr, ctrl;
 
-	/* Ensure that we are in monitor mode and halting mode is disabled. */
-	ret = enable_monitor_mode();
-	if (ret)
-		goto out;
-
 	addr = info->address;
 	ctrl = encode_ctrl_reg(info->ctrl) | 0x1;
 
@@ -355,10 +357,8 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 		}
 	}
 
-	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot\n")) {
-		ret = -EBUSY;
-		goto out;
-	}
+	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot\n"))
+		return -EBUSY;
 
 	/* Override the breakpoint data with the step data. */
 	if (info->step_ctrl.enabled) {
@@ -376,9 +376,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 
 	/* Setup the control register. */
 	write_wb_reg(ctrl_base + i, ctrl);
-
-out:
-	return ret;
+	return 0;
 }
 
 void arch_uninstall_hw_breakpoint(struct perf_event *bp)
@@ -589,6 +587,10 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	int ret = 0;
 	u32 offset, alignment_mask = 0x3;
 
+	/* Ensure that we are in monitor debug mode. */
+	if (!monitor_mode_enabled())
+		return -ENODEV;
+
 	/* Build the arch_hw_breakpoint. */
 	ret = arch_build_bp_info(bp);
 	if (ret)
-- 
cgit v1.2.3


From 9e962f76602dbd293a57030f4ce5a4b57853e2ea Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Wed, 26 Sep 2012 17:28:47 +0100
Subject: ARM: hw_breakpoint: use CRn as argument for debug reg accessor macros

The coprocessor register CRn for accesses to the debug register can be a
different one than C0. Take this into account for the ARM_DBG_READ and
the ARM_DBG_WRITE macro.

The inline assembler calls which used a coprocessor register CRn other
than C0 are replaced by the ARM_DBG_READ or ARM_DBG_WRITE macro.

Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/hw_breakpoint.h |  8 ++++----
 arch/arm/kernel/hw_breakpoint.c      | 40 ++++++++++++++++++------------------
 2 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h
index c190bc992f0..01169dd723f 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -98,12 +98,12 @@ static inline void decode_ctrl_reg(u32 reg,
 #define ARM_BASE_WCR		112
 
 /* Accessor macros for the debug registers. */
-#define ARM_DBG_READ(M, OP2, VAL) do {\
-	asm volatile("mrc p14, 0, %0, c0," #M ", " #OP2 : "=r" (VAL));\
+#define ARM_DBG_READ(N, M, OP2, VAL) do {\
+	asm volatile("mrc p14, 0, %0, " #N "," #M ", " #OP2 : "=r" (VAL));\
 } while (0)
 
-#define ARM_DBG_WRITE(M, OP2, VAL) do {\
-	asm volatile("mcr p14, 0, %0, c0," #M ", " #OP2 : : "r" (VAL));\
+#define ARM_DBG_WRITE(N, M, OP2, VAL) do {\
+	asm volatile("mcr p14, 0, %0, " #N "," #M ", " #OP2 : : "r" (VAL));\
 } while (0)
 
 struct notifier_block;
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 8e9532152bd..05febbaecb4 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -52,14 +52,14 @@ static u8 debug_arch;
 /* Maximum supported watchpoint length. */
 static u8 max_watchpoint_len;
 
-#define READ_WB_REG_CASE(OP2, M, VAL)		\
-	case ((OP2 << 4) + M):			\
-		ARM_DBG_READ(c ## M, OP2, VAL); \
+#define READ_WB_REG_CASE(OP2, M, VAL)			\
+	case ((OP2 << 4) + M):				\
+		ARM_DBG_READ(c0, c ## M, OP2, VAL);	\
 		break
 
-#define WRITE_WB_REG_CASE(OP2, M, VAL)		\
-	case ((OP2 << 4) + M):			\
-		ARM_DBG_WRITE(c ## M, OP2, VAL);\
+#define WRITE_WB_REG_CASE(OP2, M, VAL)			\
+	case ((OP2 << 4) + M):				\
+		ARM_DBG_WRITE(c0, c ## M, OP2, VAL);	\
 		break
 
 #define GEN_READ_WB_REG_CASES(OP2, VAL)		\
@@ -141,7 +141,7 @@ static u8 get_debug_arch(void)
 		return ARM_DEBUG_ARCH_V6;
 	}
 
-	ARM_DBG_READ(c0, 0, didr);
+	ARM_DBG_READ(c0, c0, 0, didr);
 	return (didr >> 16) & 0xf;
 }
 
@@ -169,7 +169,7 @@ static int debug_exception_updates_fsr(void)
 static int get_num_wrp_resources(void)
 {
 	u32 didr;
-	ARM_DBG_READ(c0, 0, didr);
+	ARM_DBG_READ(c0, c0, 0, didr);
 	return ((didr >> 28) & 0xf) + 1;
 }
 
@@ -177,7 +177,7 @@ static int get_num_wrp_resources(void)
 static int get_num_brp_resources(void)
 {
 	u32 didr;
-	ARM_DBG_READ(c0, 0, didr);
+	ARM_DBG_READ(c0, c0, 0, didr);
 	return ((didr >> 24) & 0xf) + 1;
 }
 
@@ -231,14 +231,14 @@ static int get_num_brps(void)
 static int monitor_mode_enabled(void)
 {
 	u32 dscr;
-	ARM_DBG_READ(c1, 0, dscr);
+	ARM_DBG_READ(c0, c1, 0, dscr);
 	return !!(dscr & ARM_DSCR_MDBGEN);
 }
 
 static int enable_monitor_mode(void)
 {
 	u32 dscr;
-	ARM_DBG_READ(c1, 0, dscr);
+	ARM_DBG_READ(c0, c1, 0, dscr);
 
 	/* If monitor mode is already enabled, just return. */
 	if (dscr & ARM_DSCR_MDBGEN)
@@ -248,11 +248,11 @@ static int enable_monitor_mode(void)
 	switch (get_debug_arch()) {
 	case ARM_DEBUG_ARCH_V6:
 	case ARM_DEBUG_ARCH_V6_1:
-		ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN));
+		ARM_DBG_WRITE(c0, c1, 0, (dscr | ARM_DSCR_MDBGEN));
 		break;
 	case ARM_DEBUG_ARCH_V7_ECP14:
 	case ARM_DEBUG_ARCH_V7_1:
-		ARM_DBG_WRITE(c2, 2, (dscr | ARM_DSCR_MDBGEN));
+		ARM_DBG_WRITE(c0, c2, 2, (dscr | ARM_DSCR_MDBGEN));
 		isb();
 		break;
 	default:
@@ -260,7 +260,7 @@ static int enable_monitor_mode(void)
 	}
 
 	/* Check that the write made it through. */
-	ARM_DBG_READ(c1, 0, dscr);
+	ARM_DBG_READ(c0, c1, 0, dscr);
 	if (WARN_ONCE(!(dscr & ARM_DSCR_MDBGEN),
 		"Failed to enable monitor mode on CPU %d.\n",
 		smp_processor_id()))
@@ -853,7 +853,7 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 		local_irq_enable();
 
 	/* We only handle watchpoints and hardware breakpoints. */
-	ARM_DBG_READ(c1, 0, dscr);
+	ARM_DBG_READ(c0, c1, 0, dscr);
 
 	/* Perform perf callbacks. */
 	switch (ARM_DSCR_MOE(dscr)) {
@@ -921,14 +921,14 @@ static void reset_ctrl_regs(void *unused)
 		 * Ensure sticky power-down is clear (i.e. debug logic is
 		 * powered up).
 		 */
-		asm volatile("mrc p14, 0, %0, c1, c5, 4" : "=r" (val));
+		ARM_DBG_READ(c1, c5, 4, val);
 		if ((val & 0x1) == 0)
 			err = -EPERM;
 
 		/*
 		 * Check whether we implement OS save and restore.
 		 */
-		asm volatile("mrc p14, 0, %0, c1, c1, 4" : "=r" (val));
+		ARM_DBG_READ(c1, c1, 4, val);
 		if ((val & 0x9) == 0)
 			goto clear_vcr;
 		break;
@@ -936,7 +936,7 @@ static void reset_ctrl_regs(void *unused)
 		/*
 		 * Ensure the OS double lock is clear.
 		 */
-		asm volatile("mrc p14, 0, %0, c1, c3, 4" : "=r" (val));
+		ARM_DBG_READ(c1, c3, 4, val);
 		if ((val & 0x1) == 1)
 			err = -EPERM;
 		break;
@@ -952,7 +952,7 @@ static void reset_ctrl_regs(void *unused)
 	 * Unconditionally clear the OS lock by writing a value
 	 * other than 0xC5ACCE55 to the access register.
 	 */
-	asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0));
+	ARM_DBG_WRITE(c1, c0, 4, 0);
 	isb();
 
 	/*
@@ -960,7 +960,7 @@ static void reset_ctrl_regs(void *unused)
 	 * enabling monitor mode.
 	 */
 clear_vcr:
-	asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0));
+	ARM_DBG_WRITE(c0, c7, 0, 0);
 	isb();
 
 	if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) {
-- 
cgit v1.2.3


From f435ab79928e4d54082e2838c4562a165e37999c Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 25 Oct 2012 17:18:23 +0100
Subject: ARM: hw_breakpoint: kill WARN_ONCE usage

WARN_ONCE is a bit OTT for some of the simple failure cases encountered
in hw_breakpoint, so use either pr_warning or pr_warn_once instead.

Reported-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/hw_breakpoint.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 05febbaecb4..5ff2e77782b 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -261,10 +261,11 @@ static int enable_monitor_mode(void)
 
 	/* Check that the write made it through. */
 	ARM_DBG_READ(c0, c1, 0, dscr);
-	if (WARN_ONCE(!(dscr & ARM_DSCR_MDBGEN),
-		"Failed to enable monitor mode on CPU %d.\n",
-		smp_processor_id()))
+	if (!(dscr & ARM_DSCR_MDBGEN)) {
+		pr_warn_once("Failed to enable monitor mode on CPU %d.\n",
+				smp_processor_id());
 		return -EPERM;
+	}
 
 out:
 	return 0;
@@ -357,8 +358,10 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 		}
 	}
 
-	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot\n"))
+	if (i == max_slots) {
+		pr_warning("Can't find any breakpoint slot\n");
 		return -EBUSY;
+	}
 
 	/* Override the breakpoint data with the step data. */
 	if (info->step_ctrl.enabled) {
@@ -407,8 +410,10 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 		}
 	}
 
-	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot\n"))
+	if (i == max_slots) {
+		pr_warning("Can't find any breakpoint slot\n");
 		return;
+	}
 
 	/* Ensure that we disable the mismatch breakpoint. */
 	if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE &&
-- 
cgit v1.2.3


From 864aa04cd02979c2c755cb28b5f4fe56039171c0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 18 Sep 2012 19:18:35 +0100
Subject: ARM: mm: use pteval_t to represent page protection values

When updating the page protection map after calculating the user_pgprot
value, the base protection map is temporarily stored in an unsigned long
type, causing truncation of the protection bits when LPAE is enabled.
This effectively means that calls to mprotect() will corrupt the upper
page attributes, clearing the XN bit unconditionally.

This patch uses pteval_t to store the intermediate protection values,
preserving the upper bits for 64-bit descriptors.

Cc: stable@vger.kernel.org
Acked-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 941dfb9e9a7..99b47b950ef 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -488,7 +488,7 @@ static void __init build_mem_type_table(void)
 #endif
 
 	for (i = 0; i < 16; i++) {
-		unsigned long v = pgprot_val(protection_map[i]);
+		pteval_t v = pgprot_val(protection_map[i]);
 		protection_map[i] = __pgprot(v | user_pgprot);
 	}
 
-- 
cgit v1.2.3


From 0cbbbad63179652272cc5e18a68d69bfc8dd25ce Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 31 Aug 2012 00:57:03 +0100
Subject: ARM: mm: don't use the access flag permissions mechanism for classic
 MMU

The simplified access permissions model is not used for the classic MMU
translation regime, so ensure that it is turned off in the sctlr prior
to turning on address translation for ARMv7.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mm/proc-v7-2level.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index fd045e70639..e37600b91b2 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -161,11 +161,11 @@ ENDPROC(cpu_v7_set_pte_ext)
 	 *  TFR   EV X F   I D LR    S
 	 * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM
 	 * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced
-	 *    1    0 110       0011 1100 .111 1101 < we want
+	 *   01    0 110       0011 1100 .111 1101 < we want
 	 */
 	.align	2
 	.type	v7_crval, #object
 v7_crval:
-	crval	clear=0x0120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c
+	crval	clear=0x2120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c
 
 	.previous
-- 
cgit v1.2.3


From dbf62d50067e55a782583fe53c3d2a3d98b1f6f3 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 19 Jul 2012 11:51:05 +0100
Subject: ARM: mm: introduce L_PTE_VALID for page table entries

For long-descriptor translation table formats, the ARMv7 architecture
defines the last two bits of the second- and third-level descriptors to
be:

	x0b	- Invalid
	01b	- Block (second-level), Reserved (third-level)
	11b	- Table (second-level), Page (third-level)

This allows us to define L_PTE_PRESENT as (3 << 0) and use this value to
create ptes directly. However, when determining whether a given pte
value is present in the low-level page table accessors, we only need to
check the least significant bit of the descriptor, allowing us to write
faulting, present entries which are required for PROT_NONE mappings.

This patch introduces L_PTE_VALID, which can be used to test whether a
pte should fault, and updates the low-level page table accessors
accordingly.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/pgtable-2level.h | 1 +
 arch/arm/include/asm/pgtable-3level.h | 3 ++-
 arch/arm/include/asm/pgtable.h        | 4 +---
 arch/arm/mm/proc-v7-2level.S          | 2 +-
 arch/arm/mm/proc-v7-3level.S          | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index 2317a71c8f8..c44a1ecfc28 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -115,6 +115,7 @@
  * The PTE table pointer refers to the hardware entries; the "Linux"
  * entries are stored 1024 bytes below.
  */
+#define L_PTE_VALID		(_AT(pteval_t, 1) << 0)		/* Valid */
 #define L_PTE_PRESENT		(_AT(pteval_t, 1) << 0)
 #define L_PTE_YOUNG		(_AT(pteval_t, 1) << 1)
 #define L_PTE_FILE		(_AT(pteval_t, 1) << 2)	/* only when !PRESENT */
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index b24903549d1..e32311a9abc 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -67,7 +67,8 @@
  * These bits overlap with the hardware bits but the naming is preserved for
  * consistency with the classic page table format.
  */
-#define L_PTE_PRESENT		(_AT(pteval_t, 3) << 0)		/* Valid */
+#define L_PTE_VALID		(_AT(pteval_t, 1) << 0)		/* Valid */
+#define L_PTE_PRESENT		(_AT(pteval_t, 3) << 0)		/* Present */
 #define L_PTE_FILE		(_AT(pteval_t, 1) << 2)		/* only when !PRESENT */
 #define L_PTE_USER		(_AT(pteval_t, 1) << 6)		/* AP[1] */
 #define L_PTE_RDONLY		(_AT(pteval_t, 1) << 7)		/* AP[2] */
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 08c12312a1f..ccf34b6e990 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -203,9 +203,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 #define pte_exec(pte)		(!(pte_val(pte) & L_PTE_XN))
 #define pte_special(pte)	(0)
 
-#define pte_present_user(pte) \
-	((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \
-	 (L_PTE_PRESENT | L_PTE_USER))
+#define pte_present_user(pte)  (pte_present(pte) && (pte_val(pte) & L_PTE_USER))
 
 #if __LINUX_ARM_ARCH__ < 6
 static inline void __sync_icache_dcache(pte_t pteval)
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index e37600b91b2..e755e9f8d1b 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -100,7 +100,7 @@ ENTRY(cpu_v7_set_pte_ext)
 	orrne	r3, r3, #PTE_EXT_XN
 
 	tst	r1, #L_PTE_YOUNG
-	tstne	r1, #L_PTE_PRESENT
+	tstne	r1, #L_PTE_VALID
 	moveq	r3, #0
 
  ARM(	str	r3, [r0, #2048]! )
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 8de0f1dd154..d23d067e190 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -65,7 +65,7 @@ ENDPROC(cpu_v7_switch_mm)
  */
 ENTRY(cpu_v7_set_pte_ext)
 #ifdef CONFIG_MMU
-	tst	r2, #L_PTE_PRESENT
+	tst	r2, #L_PTE_VALID
 	beq	1f
 	tst	r3, #1 << (55 - 32)		@ L_PTE_DIRTY
 	orreq	r2, #L_PTE_RDONLY
-- 
cgit v1.2.3


From 26ffd0d43b186b0d5186354da8714a1c2d360df0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Sat, 1 Sep 2012 05:22:12 +0100
Subject: ARM: mm: introduce present, faulting entries for PAGE_NONE

PROT_NONE mappings apply the page protection attributes defined by _P000
which translate to PAGE_NONE for ARM. These attributes specify an XN,
RDONLY pte that is inaccessible to userspace. However, on kernels
configured without support for domains, such a pte *is* accessible to
the kernel and can be read via get_user, allowing tasks to read
PROT_NONE pages via syscalls such as read/write over a pipe.

This patch introduces a new software pte flag, L_PTE_NONE, that is set
to identify faulting, present entries.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/pgtable-2level.h | 1 +
 arch/arm/include/asm/pgtable-3level.h | 1 +
 arch/arm/include/asm/pgtable.h        | 6 +++---
 arch/arm/mm/proc-macros.S             | 4 ++++
 arch/arm/mm/proc-v7-2level.S          | 4 ++++
 arch/arm/mm/proc-v7-3level.S          | 3 +++
 6 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index c44a1ecfc28..f97ee02386e 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -124,6 +124,7 @@
 #define L_PTE_USER		(_AT(pteval_t, 1) << 8)
 #define L_PTE_XN		(_AT(pteval_t, 1) << 9)
 #define L_PTE_SHARED		(_AT(pteval_t, 1) << 10)	/* shared(v6), coherent(xsc3) */
+#define L_PTE_NONE		(_AT(pteval_t, 1) << 11)
 
 /*
  * These are the memory types, defined to be compatible with
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index e32311a9abc..a3f37929940 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -77,6 +77,7 @@
 #define L_PTE_XN		(_AT(pteval_t, 1) << 54)	/* XN */
 #define L_PTE_DIRTY		(_AT(pteval_t, 1) << 55)	/* unused */
 #define L_PTE_SPECIAL		(_AT(pteval_t, 1) << 56)	/* unused */
+#define L_PTE_NONE		(_AT(pteval_t, 1) << 57)	/* PROT_NONE */
 
 /*
  * To be used in assembly code with the upper page attributes.
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index ccf34b6e990..9c82f988c0e 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -73,7 +73,7 @@ extern pgprot_t		pgprot_kernel;
 
 #define _MOD_PROT(p, b)	__pgprot(pgprot_val(p) | (b))
 
-#define PAGE_NONE		_MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY)
+#define PAGE_NONE		_MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY | L_PTE_NONE)
 #define PAGE_SHARED		_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_XN)
 #define PAGE_SHARED_EXEC	_MOD_PROT(pgprot_user, L_PTE_USER)
 #define PAGE_COPY		_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY | L_PTE_XN)
@@ -83,7 +83,7 @@ extern pgprot_t		pgprot_kernel;
 #define PAGE_KERNEL		_MOD_PROT(pgprot_kernel, L_PTE_XN)
 #define PAGE_KERNEL_EXEC	pgprot_kernel
 
-#define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
+#define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
 #define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
 #define __PAGE_SHARED_EXEC	__pgprot(_L_PTE_DEFAULT | L_PTE_USER)
 #define __PAGE_COPY		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY | L_PTE_XN)
@@ -240,7 +240,7 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-	const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER;
+	const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER | L_PTE_NONE;
 	pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
 	return pte;
 }
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index b29a2265af0..eb6aa73bc8b 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -167,6 +167,10 @@
 	tst	r1, #L_PTE_YOUNG
 	tstne	r1, #L_PTE_PRESENT
 	moveq	r3, #0
+#ifndef CONFIG_CPU_USE_DOMAINS
+	tstne	r1, #L_PTE_NONE
+	movne	r3, #0
+#endif
 
 	str	r3, [r0]
 	mcr	p15, 0, r0, c7, c10, 1		@ flush_pte
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index e755e9f8d1b..6d98c13ab82 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -101,6 +101,10 @@ ENTRY(cpu_v7_set_pte_ext)
 
 	tst	r1, #L_PTE_YOUNG
 	tstne	r1, #L_PTE_VALID
+#ifndef CONFIG_CPU_USE_DOMAINS
+	eorne	r1, r1, #L_PTE_NONE
+	tstne	r1, #L_PTE_NONE
+#endif
 	moveq	r3, #0
 
  ARM(	str	r3, [r0, #2048]! )
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index d23d067e190..7b56386f949 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -67,6 +67,9 @@ ENTRY(cpu_v7_set_pte_ext)
 #ifdef CONFIG_MMU
 	tst	r2, #L_PTE_VALID
 	beq	1f
+	tst	r3, #1 << (57 - 32)		@ L_PTE_NONE
+	bicne	r2, #L_PTE_VALID
+	bne	1f
 	tst	r3, #1 << (55 - 32)		@ L_PTE_DIRTY
 	orreq	r2, #L_PTE_RDONLY
 1:	strd	r2, r3, [r0]
-- 
cgit v1.2.3


From b62655f4c6f3e4d21934eee14ac2ac5cd479c97c Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 6 Nov 2012 03:48:40 +0100
Subject: ARM: 7571/1: SMP: add function arch_send_wakeup_ipi_mask()

Add function arch_send_wakeup_ipi_mask(), so that platform code can
use it as an easy way to wake up cores that are in WFI.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/smp.h | 1 +
 arch/arm/kernel/smp.c      | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 2e3be16c676..d3a22bebe6c 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -79,6 +79,7 @@ extern void cpu_die(void);
 
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
 
 struct smp_operations {
 #ifdef CONFIG_SMP
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 8e20754dd31..dd5dd0248b8 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -415,6 +415,11 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 	smp_cross_call(mask, IPI_CALL_FUNC);
 }
 
+void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+{
+	smp_cross_call(mask, IPI_WAKEUP);
+}
+
 void arch_send_call_function_single_ipi(int cpu)
 {
 	smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
-- 
cgit v1.2.3


From e40678559fdf3f56ce9a349365fbf39e1f63ecc0 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Thu, 8 Nov 2012 19:46:07 +0100
Subject: ARM: 7573/1: idmap: use flush_cache_louis() and flush TLBs only when
 necessary

Flushing the cache is needed for the hardware to see the idmap table
and therefore can be done at init time.  On ARMv7 it is not necessary to
flush L2 so flush_cache_louis() is used here instead.

There is no point flushing the cache in setup_mm_for_reboot() as the
caller should, and already is, taking care of this.  If switching the
memory map requires a cache flush, then cpu_switch_mm() already includes
that operation.

What is not done by cpu_switch_mm() on ASID capable CPUs is TLB flushing
as the whole point of the ASID is to tag the TLBs and avoid flushing them
on a context switch.  Since we don't have a clean ASID for the identity
mapping, we need to flush the TLB explicitly in that case.  Otherwise
this is already performed by cpu_switch_mm().

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/idmap.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index ab88ed4f8e0..99db769307e 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -92,6 +92,9 @@ static int __init init_static_idmap(void)
 		(long long)idmap_start, (long long)idmap_end);
 	identity_mapping_add(idmap_pgd, idmap_start, idmap_end);
 
+	/* Flush L1 for the hardware to see this page table content */
+	flush_cache_louis();
+
 	return 0;
 }
 early_initcall(init_static_idmap);
@@ -103,12 +106,15 @@ early_initcall(init_static_idmap);
  */
 void setup_mm_for_reboot(void)
 {
-	/* Clean and invalidate L1. */
-	flush_cache_all();
-
 	/* Switch to the identity mapping. */
 	cpu_switch_mm(idmap_pgd, &init_mm);
 
-	/* Flush the TLB. */
+#ifdef CONFIG_CPU_HAS_ASID
+	/*
+	 * We don't have a clean ASID for the identity mapping, which
+	 * may clash with virtual addresses of the previous page tables
+	 * and therefore potentially in the TLB.
+	 */
 	local_flush_tlb_all();
+#endif
 }
-- 
cgit v1.2.3


From 9ecb47de3490b8f2d4b818568935da9ca2c22398 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Thu, 8 Nov 2012 19:54:11 +0100
Subject: ARM: 7574/1: kernel/process.c: include idmap.h instead of redeclaring
 setup_mm_for_reboot()

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/process.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 45fd05186a3..44bc0b327e2 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -34,6 +34,7 @@
 #include <linux/leds.h>
 
 #include <asm/cacheflush.h>
+#include <asm/idmap.h>
 #include <asm/processor.h>
 #include <asm/thread_notify.h>
 #include <asm/stacktrace.h>
@@ -56,8 +57,6 @@ static const char *isa_modes[] = {
   "ARM" , "Thumb" , "Jazelle", "ThumbEE"
 };
 
-extern void setup_mm_for_reboot(void);
-
 static volatile int hlt_counter;
 
 void disable_hlt(void)
-- 
cgit v1.2.3


From f600b9fcd2bcb8ee0adb235f54ccdd93c729c442 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 15 Nov 2012 21:28:43 +0000
Subject: ARM: cti: fix manipulation of debug lock registers

The LOCKSTATUS register for memory-mapped coresight devices indicates
whether or not the device in question implements hardware locking. If
not, locking is not present (i.e. LSR.SLI == 0) and LAR is write-ignore,
so software doesn't actually need to check the status register at all.

This patch removes the broken LSR checks.

Cc: Ming Lei <ming.lei@canonical.com>
Reported-by: Mike Williams <michael.williams@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/cti.h | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/cti.h b/arch/arm/include/asm/cti.h
index a0ada3ea435..f2e5cad3f30 100644
--- a/arch/arm/include/asm/cti.h
+++ b/arch/arm/include/asm/cti.h
@@ -146,15 +146,7 @@ static inline void cti_irq_ack(struct cti *cti)
  */
 static inline void cti_unlock(struct cti *cti)
 {
-	void __iomem *base = cti->base;
-	unsigned long val;
-
-	val = __raw_readl(base + LOCKSTATUS);
-
-	if (val & 1) {
-		val = LOCKCODE;
-		__raw_writel(val, base + LOCKACCESS);
-	}
+	__raw_writel(LOCKCODE, cti->base + LOCKACCESS);
 }
 
 /**
@@ -166,14 +158,6 @@ static inline void cti_unlock(struct cti *cti)
  */
 static inline void cti_lock(struct cti *cti)
 {
-	void __iomem *base = cti->base;
-	unsigned long val;
-
-	val = __raw_readl(base + LOCKSTATUS);
-
-	if (!(val & 1)) {
-		val = ~LOCKCODE;
-		__raw_writel(val, base + LOCKACCESS);
-	}
+	__raw_writel(~LOCKCODE, cti->base + LOCKACCESS);
 }
 #endif
-- 
cgit v1.2.3


From 6920b5a791bbb54810159fbf6acf2c6ae14cdd22 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 21 Sep 2012 10:18:58 +0100
Subject: ARM: move serial_sa1100.h header file to linux/platform_data

This is really driver platform data, so move it to the appropriate
directory.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mach/serial_sa1100.h | 31 -------------------------------
 arch/arm/mach-sa1100/assabet.c            |  2 +-
 arch/arm/mach-sa1100/badge4.c             |  2 +-
 arch/arm/mach-sa1100/cerf.c               |  2 +-
 arch/arm/mach-sa1100/collie.c             |  2 +-
 arch/arm/mach-sa1100/h3xxx.c              |  2 +-
 arch/arm/mach-sa1100/hackkit.c            |  2 +-
 arch/arm/mach-sa1100/jornada720.c         |  2 +-
 arch/arm/mach-sa1100/lart.c               |  2 +-
 arch/arm/mach-sa1100/nanoengine.c         |  2 +-
 arch/arm/mach-sa1100/neponset.c           |  2 +-
 arch/arm/mach-sa1100/pleb.c               |  2 +-
 arch/arm/mach-sa1100/shannon.c            |  2 +-
 arch/arm/mach-sa1100/simpad.c             |  2 +-
 14 files changed, 13 insertions(+), 44 deletions(-)
 delete mode 100644 arch/arm/include/asm/mach/serial_sa1100.h

(limited to 'arch')

diff --git a/arch/arm/include/asm/mach/serial_sa1100.h b/arch/arm/include/asm/mach/serial_sa1100.h
deleted file mode 100644
index d09064bf95a..00000000000
--- a/arch/arm/include/asm/mach/serial_sa1100.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- *  arch/arm/include/asm/mach/serial_sa1100.h
- *
- *  Author: Nicolas Pitre
- *
- * Moved and changed lots, Russell King
- *
- * Low level machine dependent UART functions.
- */
-
-struct uart_port;
-struct uart_info;
-
-/*
- * This is a temporary structure for registering these
- * functions; it is intended to be discarded after boot.
- */
-struct sa1100_port_fns {
-	void	(*set_mctrl)(struct uart_port *, u_int);
-	u_int	(*get_mctrl)(struct uart_port *);
-	void	(*pm)(struct uart_port *, u_int, u_int);
-	int	(*set_wake)(struct uart_port *, u_int);
-};
-
-#ifdef CONFIG_SERIAL_SA1100
-void sa1100_register_uart_fns(struct sa1100_port_fns *fns);
-void sa1100_register_uart(int idx, int port);
-#else
-#define sa1100_register_uart_fns(fns) do { } while (0)
-#define sa1100_register_uart(idx,port) do { } while (0)
-#endif
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 6a7ad3c2a3f..9a23739f702 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/serial_core.h>
 #include <linux/mfd/ucb1x00.h>
 #include <linux/mtd/mtd.h>
@@ -37,7 +38,6 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/irda.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 #include <mach/assabet.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-sa1100/badge4.c b/arch/arm/mach-sa1100/badge4.c
index 038df4894b0..b2dadf3ea3d 100644
--- a/arch/arm/mach-sa1100/badge4.c
+++ b/arch/arm/mach-sa1100/badge4.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/tty.h>
@@ -34,7 +35,6 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
 #include <asm/hardware/sa1111.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/badge4.h>
 
diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c
index ad0eb08ea07..304bca4a07c 100644
--- a/arch/arm/mach-sa1100/cerf.c
+++ b/arch/arm/mach-sa1100/cerf.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/tty.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/mtd/mtd.h>
@@ -27,7 +28,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/cerf.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 170cb6107f6..45f424f5fca 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -21,6 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/tty.h>
 #include <linux/delay.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/ucb1x00.h>
 #include <linux/mtd/mtd.h>
@@ -40,7 +41,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <asm/hardware/scoop.h>
 #include <asm/mach/sharpsl_param.h>
diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c
index 63150e1ffe9..f17e7382242 100644
--- a/arch/arm/mach-sa1100/h3xxx.c
+++ b/arch/arm/mach-sa1100/h3xxx.c
@@ -17,12 +17,12 @@
 #include <linux/mfd/htc-egpio.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/serial_core.h>
 
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/h3xxx.h>
 
diff --git a/arch/arm/mach-sa1100/hackkit.c b/arch/arm/mach-sa1100/hackkit.c
index fc106aab7c7..d005939c41f 100644
--- a/arch/arm/mach-sa1100/hackkit.c
+++ b/arch/arm/mach-sa1100/hackkit.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/cpufreq.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/serial_core.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
@@ -35,7 +36,6 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
 #include <asm/mach/irq.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/hardware.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-sa1100/jornada720.c b/arch/arm/mach-sa1100/jornada720.c
index e3084f47027..35cfc428b4d 100644
--- a/arch/arm/mach-sa1100/jornada720.c
+++ b/arch/arm/mach-sa1100/jornada720.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/tty.h>
 #include <linux/delay.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/ioport.h>
 #include <linux/mtd/mtd.h>
@@ -30,7 +31,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/hardware.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c
index 3048b17e84c..f69f78fc3dd 100644
--- a/arch/arm/mach-sa1100/lart.c
+++ b/arch/arm/mach-sa1100/lart.c
@@ -4,6 +4,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/tty.h>
 #include <linux/gpio.h>
 #include <linux/leds.h>
@@ -18,7 +19,6 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
 #include <mach/irqs.h>
 
diff --git a/arch/arm/mach-sa1100/nanoengine.c b/arch/arm/mach-sa1100/nanoengine.c
index 41f69d97066..102e08f7b10 100644
--- a/arch/arm/mach-sa1100/nanoengine.c
+++ b/arch/arm/mach-sa1100/nanoengine.c
@@ -13,6 +13,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/root_dev.h>
@@ -24,7 +25,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 
 #include <mach/hardware.h>
 #include <mach/nanoengine.h>
diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c
index 266db873a4e..88be0474f3d 100644
--- a/arch/arm/mach-sa1100/neponset.c
+++ b/arch/arm/mach-sa1100/neponset.c
@@ -7,6 +7,7 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/serial_core.h>
@@ -14,7 +15,6 @@
 
 #include <asm/mach-types.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 #include <asm/hardware/sa1111.h>
 #include <asm/sizes.h>
 
diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c
index 37fe0a0a536..c51bb63f90f 100644
--- a/arch/arm/mach-sa1100/pleb.c
+++ b/arch/arm/mach-sa1100/pleb.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/tty.h>
 #include <linux/ioport.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/io.h>
@@ -18,7 +19,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <asm/mach/flash.h>
-#include <asm/mach/serial_sa1100.h>
 #include <mach/irqs.h>
 
 #include "generic.h"
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index ff6b7b35bca..6460d25fbb8 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -5,6 +5,7 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/tty.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
@@ -18,7 +19,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
 #include <mach/shannon.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index 71790e581d9..6d65f65fcb2 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -9,6 +9,7 @@
 #include <linux/proc_fs.h>
 #include <linux/string.h>
 #include <linux/pm.h>
+#include <linux/platform_data/sa11x0-serial.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/ucb1x00.h>
 #include <linux/mtd/mtd.h>
@@ -23,7 +24,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/serial_sa1100.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
 #include <mach/simpad.h>
 #include <mach/irqs.h>
-- 
cgit v1.2.3


From 46000065a631c6d21452d533baf086175c384ba4 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 21 Sep 2012 10:23:44 +0100
Subject: ARM: move udc_pxa2xx.h to linux/platform_data

Move the PXA2xx/IXP4xx UDC header file into linux/platform_data as it
only contains a driver platform data structure.

Acked-by: Felipe Balbi <balbi@ti.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Krzysztof Halasa <khc@pm.waw.pl>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mach/udc_pxa2xx.h  | 26 --------------------------
 arch/arm/mach-ixp4xx/include/mach/udc.h |  2 +-
 arch/arm/mach-pxa/include/mach/udc.h    |  2 +-
 3 files changed, 2 insertions(+), 28 deletions(-)
 delete mode 100644 arch/arm/include/asm/mach/udc_pxa2xx.h

(limited to 'arch')

diff --git a/arch/arm/include/asm/mach/udc_pxa2xx.h b/arch/arm/include/asm/mach/udc_pxa2xx.h
deleted file mode 100644
index ea297ac70bc..00000000000
--- a/arch/arm/include/asm/mach/udc_pxa2xx.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * arch/arm/include/asm/mach/udc_pxa2xx.h
- *
- * This supports machine-specific differences in how the PXA2xx
- * USB Device Controller (UDC) is wired.
- *
- * It is set in linux/arch/arm/mach-pxa/<machine>.c or in
- * linux/arch/mach-ixp4xx/<machine>.c and used in
- * the probe routine of linux/drivers/usb/gadget/pxa2xx_udc.c
- */
-
-struct pxa2xx_udc_mach_info {
-        int  (*udc_is_connected)(void);		/* do we see host? */
-        void (*udc_command)(int cmd);
-#define	PXA2XX_UDC_CMD_CONNECT		0	/* let host see us */
-#define	PXA2XX_UDC_CMD_DISCONNECT	1	/* so host won't see us */
-
-	/* Boards following the design guidelines in the developer's manual,
-	 * with on-chip GPIOs not Lubbock's weird hardware, can have a sane
-	 * VBUS IRQ and omit the methods above.  Store the GPIO number
-	 * here.  Note that sometimes the signals go through inverters...
-	 */
-	bool	gpio_pullup_inverted;
-	int	gpio_pullup;			/* high == pullup activated */
-};
-
diff --git a/arch/arm/mach-ixp4xx/include/mach/udc.h b/arch/arm/mach-ixp4xx/include/mach/udc.h
index 80d6da2eafa..7bd8b96c884 100644
--- a/arch/arm/mach-ixp4xx/include/mach/udc.h
+++ b/arch/arm/mach-ixp4xx/include/mach/udc.h
@@ -2,7 +2,7 @@
  * arch/arm/mach-ixp4xx/include/mach/udc.h
  *
  */
-#include <asm/mach/udc_pxa2xx.h>
+#include <linux/platform_data/pxa2xx_udc.h>
 
 extern void ixp4xx_set_udc_info(struct pxa2xx_udc_mach_info *info);
 
diff --git a/arch/arm/mach-pxa/include/mach/udc.h b/arch/arm/mach-pxa/include/mach/udc.h
index 2f82332e81a..9a827e32db9 100644
--- a/arch/arm/mach-pxa/include/mach/udc.h
+++ b/arch/arm/mach-pxa/include/mach/udc.h
@@ -2,7 +2,7 @@
  * arch/arm/mach-pxa/include/mach/udc.h
  *
  */
-#include <asm/mach/udc_pxa2xx.h>
+#include <linux/platform_data/pxa2xx_udc.h>
 
 extern void pxa_set_udc_info(struct pxa2xx_udc_mach_info *info);
 
-- 
cgit v1.2.3


From 95e629b761ce36996d1befe2824d5346b5a220b9 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 21 Sep 2012 10:26:40 +0100
Subject: ARM/AVR32: get rid of serial_at91.h

The definitions provided by serial_at91.h are only used by the
atmel_serial driver, and the function that uses it is never called
from anywhere in the kernel.  Therefore, these definitions are unused
and/or obsolete, and can be removed.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mach/serial_at91.h   | 33 -------------------------------
 arch/avr32/include/asm/mach/serial_at91.h | 33 -------------------------------
 2 files changed, 66 deletions(-)
 delete mode 100644 arch/arm/include/asm/mach/serial_at91.h
 delete mode 100644 arch/avr32/include/asm/mach/serial_at91.h

(limited to 'arch')

diff --git a/arch/arm/include/asm/mach/serial_at91.h b/arch/arm/include/asm/mach/serial_at91.h
deleted file mode 100644
index ea6d063923b..00000000000
--- a/arch/arm/include/asm/mach/serial_at91.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- *  arch/arm/include/asm/mach/serial_at91.h
- *
- *  Based on serial_sa1100.h  by Nicolas Pitre
- *
- *  Copyright (C) 2002 ATMEL Rousset
- *
- *  Low level machine dependent UART functions.
- */
-
-struct uart_port;
-
-/*
- * This is a temporary structure for registering these
- * functions; it is intended to be discarded after boot.
- */
-struct atmel_port_fns {
-	void	(*set_mctrl)(struct uart_port *, u_int);
-	u_int	(*get_mctrl)(struct uart_port *);
-	void	(*enable_ms)(struct uart_port *);
-	void	(*pm)(struct uart_port *, u_int, u_int);
-	int	(*set_wake)(struct uart_port *, u_int);
-	int	(*open)(struct uart_port *);
-	void	(*close)(struct uart_port *);
-};
-
-#if defined(CONFIG_SERIAL_ATMEL)
-void atmel_register_uart_fns(struct atmel_port_fns *fns);
-#else
-#define atmel_register_uart_fns(fns) do { } while (0)
-#endif
-
-
diff --git a/arch/avr32/include/asm/mach/serial_at91.h b/arch/avr32/include/asm/mach/serial_at91.h
deleted file mode 100644
index 55b317a8906..00000000000
--- a/arch/avr32/include/asm/mach/serial_at91.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- *  linux/include/asm-arm/mach/serial_at91.h
- *
- *  Based on serial_sa1100.h  by Nicolas Pitre
- *
- *  Copyright (C) 2002 ATMEL Rousset
- *
- *  Low level machine dependent UART functions.
- */
-
-struct uart_port;
-
-/*
- * This is a temporary structure for registering these
- * functions; it is intended to be discarded after boot.
- */
-struct atmel_port_fns {
-	void	(*set_mctrl)(struct uart_port *, u_int);
-	u_int	(*get_mctrl)(struct uart_port *);
-	void	(*enable_ms)(struct uart_port *);
-	void	(*pm)(struct uart_port *, u_int, u_int);
-	int	(*set_wake)(struct uart_port *, u_int);
-	int	(*open)(struct uart_port *);
-	void	(*close)(struct uart_port *);
-};
-
-#if defined(CONFIG_SERIAL_ATMEL)
-void atmel_register_uart_fns(struct atmel_port_fns *fns);
-#else
-#define atmel_register_uart_fns(fns) do { } while (0)
-#endif
-
-
-- 
cgit v1.2.3


From 1f59d13bee172945ccdfbc5018477ba94a0ac28e Mon Sep 17 00:00:00 2001
From: Will Drewry <wad@chromium.org>
Date: Thu, 15 Nov 2012 22:11:29 +0100
Subject: ARM: 7577/1: arch/add syscall_get_arch

Provide an ARM implementation of syscall_get_arch. This is a pre-requisite
for CONFIG_HAVE_ARCH_SECCOMP_FILTER.

Signed-off-by: Will Drewry <wad@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/syscall.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index 9fdded6b108..f1d96d4e809 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -7,6 +7,8 @@
 #ifndef _ASM_ARM_SYSCALL_H
 #define _ASM_ARM_SYSCALL_H
 
+#include <linux/audit.h> /* for AUDIT_ARCH_* */
+#include <linux/elf.h> /* for ELF_EM */
 #include <linux/err.h>
 #include <linux/sched.h>
 
@@ -95,4 +97,11 @@ static inline void syscall_set_arguments(struct task_struct *task,
 	memcpy(&regs->ARM_r0 + i, args, n * sizeof(args[0]));
 }
 
+static inline int syscall_get_arch(struct task_struct *task,
+				   struct pt_regs *regs)
+{
+	/* ARM tasks don't change audit architectures on the fly. */
+	return AUDIT_ARCH_ARM;
+}
+
 #endif /* _ASM_ARM_SYSCALL_H */
-- 
cgit v1.2.3


From 9b790d71d58be65f9508ab60920eb978af828412 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 15 Nov 2012 22:12:00 +0100
Subject: ARM: 7578/1: arch/move secure_computing into trace

There is very little difference in the TIF_SECCOMP and TIF_SYSCALL_WORK
path in entry-common.S, so merge TIF_SECCOMP into TIF_SYSCALL_WORK and
move seccomp into the syscall_trace_enter() handler.

Expanded some of the tracehook logic into the callers to make this code
more readable. Since tracehook needs to do register changing, this portion
is best left in its own function instead of copy/pasting into the callers.

Additionally, the return value for secure_computing() is now checked
and a -1 value will result in the system call being skipped.

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Will Drewry <wad@chromium.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/thread_info.h |  7 ++++---
 arch/arm/kernel/entry-common.S     | 10 ----------
 arch/arm/kernel/ptrace.c           | 29 ++++++++++++++++++++---------
 3 files changed, 24 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 8477b4c1d39..cddda1f41f0 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -151,10 +151,10 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define TIF_SYSCALL_TRACE	8
 #define TIF_SYSCALL_AUDIT	9
 #define TIF_SYSCALL_TRACEPOINT	10
+#define TIF_SECCOMP		11	/* seccomp syscall filtering active */
 #define TIF_USING_IWMMXT	17
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	20
-#define TIF_SECCOMP		21
 #define TIF_SWITCH_MM		22	/* deferred switch_mm */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
@@ -163,11 +163,12 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
-#define _TIF_USING_IWMMXT	(1 << TIF_USING_IWMMXT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_USING_IWMMXT	(1 << TIF_USING_IWMMXT)
 
 /* Checks for any syscall work in entry-common.S */
-#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SYSCALL_TRACEPOINT)
+#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+			   _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
 
 /*
  * Change these and you break ASM code in entry-common.S
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 417bac1846b..b621871dd27 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -417,16 +417,6 @@ local_restart:
 	ldr	r10, [tsk, #TI_FLAGS]		@ check for syscall tracing
 	stmdb	sp!, {r4, r5}			@ push fifth and sixth args
 
-#ifdef CONFIG_SECCOMP
-	tst	r10, #_TIF_SECCOMP
-	beq	1f
-	mov	r0, scno
-	bl	__secure_computing	
-	add	r0, sp, #S_R0 + S_OFF		@ pointer to regs
-	ldmia	r0, {r0 - r3}			@ have to reload r0 - r3
-1:
-#endif
-
 	tst	r10, #_TIF_SYSCALL_WORK		@ are we tracing syscalls?
 	bne	__sys_trace
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 739db3a1b2d..518536d93fb 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -916,16 +916,11 @@ enum ptrace_syscall_dir {
 	PTRACE_SYSCALL_EXIT,
 };
 
-static int ptrace_syscall_trace(struct pt_regs *regs, int scno,
-				enum ptrace_syscall_dir dir)
+static int tracehook_report_syscall(struct pt_regs *regs,
+				    enum ptrace_syscall_dir dir)
 {
 	unsigned long ip;
 
-	current_thread_info()->syscall = scno;
-
-	if (!test_thread_flag(TIF_SYSCALL_TRACE))
-		return scno;
-
 	/*
 	 * IP is used to denote syscall entry/exit:
 	 * IP = 0 -> entry, =1 -> exit
@@ -944,19 +939,35 @@ static int ptrace_syscall_trace(struct pt_regs *regs, int scno,
 
 asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
 {
-	scno = ptrace_syscall_trace(regs, scno, PTRACE_SYSCALL_ENTER);
+	current_thread_info()->syscall = scno;
+
+	/* Do the secure computing check first; failures should be fast. */
+	if (secure_computing(scno) == -1)
+		return -1;
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		scno = tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
 		trace_sys_enter(regs, scno);
+
 	audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0, regs->ARM_r1,
 			    regs->ARM_r2, regs->ARM_r3);
+
 	return scno;
 }
 
 asmlinkage int syscall_trace_exit(struct pt_regs *regs, int scno)
 {
-	scno = ptrace_syscall_trace(regs, scno, PTRACE_SYSCALL_EXIT);
+	current_thread_info()->syscall = scno;
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		scno = tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
+
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
 		trace_sys_exit(regs, scno);
+
 	audit_syscall_exit(regs);
+
 	return scno;
 }
-- 
cgit v1.2.3


From ad75b51459ae076a0d406391496f81b897bf6992 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 15 Nov 2012 22:12:17 +0100
Subject: ARM: 7579/1: arch/allow a scno of -1 to not cause a SIGILL

On tracehook-friendly platforms, a system call number of -1 falls
through without running much code or taking much action.

ARM is different. This adds a short-circuit check in the trace path to
avoid any additional work, as suggested by Russell King, to make sure
that ARM behaves the same way as other platforms.

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Will Drewry <wad@chromium.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/entry-common.S | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index b621871dd27..ee81dbc6fa1 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -448,7 +448,10 @@ __sys_trace:
 	ldmccia	r1, {r0 - r6}			@ have to reload r0 - r6
 	stmccia	sp, {r4, r5}			@ and update the stack args
 	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine
-	b	2b
+	cmp	scno, #-1			@ skip the syscall?
+	bne	2b
+	add	sp, sp, #S_OFF			@ restore stack
+	b	ret_slow_syscall
 
 __sys_trace_return:
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
-- 
cgit v1.2.3


From 4095ccc39ed91714d3b5172f16a9aebf30bbbea9 Mon Sep 17 00:00:00 2001
From: Will Drewry <wad@chromium.org>
Date: Thu, 15 Nov 2012 22:12:29 +0100
Subject: ARM: 7580/1: arch/select HAVE_ARCH_SECCOMP_FILTER

Reflect architectural support for seccomp filter.

Signed-off-by: Will Drewry <wad@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 208414c0506..a40e5f6abda 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -22,6 +22,7 @@ config ARM
 	select HAVE_AOUT
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_KGDB
+	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_BPF_JIT
 	select HAVE_C_RECORDMCOUNT
-- 
cgit v1.2.3


From e8d432c9cf0a3d569b2d00877d12c9ffe9e55286 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Tue, 6 Nov 2012 11:57:43 +0000
Subject: ARM: kernel: add MIDR to per-CPU information data

The advent of big.LITTLE ARM platforms requires the kernel to be able
to identify the MIDRs of all online CPUs upon request. MIDRs are stashed
at boot time so that kernel subsystems can detect the MIDR of online CPUs
by simply retrieving per-CPU data updated by all booted CPUs.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/include/asm/cpu.h | 1 +
 arch/arm/kernel/smp.c      | 1 +
 2 files changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/asm/cpu.h b/arch/arm/include/asm/cpu.h
index d797223b39d..2744f060255 100644
--- a/arch/arm/include/asm/cpu.h
+++ b/arch/arm/include/asm/cpu.h
@@ -15,6 +15,7 @@
 
 struct cpuinfo_arm {
 	struct cpu	cpu;
+	u32		cpuid;
 #ifdef CONFIG_SMP
 	unsigned int	loops_per_jiffy;
 #endif
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index fbc8b2623d8..7eacd84cdc9 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -281,6 +281,7 @@ static void __cpuinit smp_store_cpu_info(unsigned int cpuid)
 	struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid);
 
 	cpu_info->loops_per_jiffy = loops_per_jiffy;
+	cpu_info->cpuid = read_cpuid_id();
 
 	store_cpu_topology(cpuid);
 }
-- 
cgit v1.2.3


From b4b8f770eb10a1bccaf8aa0ec1956e2dd7ed1e0a Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 10 Sep 2012 18:55:21 +0100
Subject: ARM: kernel: update cpuinfo to print all online CPUs features

Currently, reading /proc/cpuinfo provides userspace with CPU ID of
the CPU carrying out the read from the file. This is fine as long as all
CPUs in the system are the same. With the advent of big.LITTLE and
heterogenous ARM systems this approach provides user space with incorrect
bits of information since CPU ids in the system might differ from the one
provided by the CPU reading the file.

This patch updates the cpuinfo show function so that a read from
/proc/cpuinfo prints HW information for all online CPUs at once, mirroring
 x86 behaviour.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/kernel/setup.c | 70 ++++++++++++++++++++++++-------------------------
 1 file changed, 35 insertions(+), 35 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index da1d1aa20ad..a15848f8b0f 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -841,12 +841,9 @@ static const char *hwcap_str[] = {
 
 static int c_show(struct seq_file *m, void *v)
 {
-	int i;
-
-	seq_printf(m, "Processor\t: %s rev %d (%s)\n",
-		   cpu_name, read_cpuid_id() & 15, elf_platform);
+	int i, j;
+	u32 cpuid;
 
-#if defined(CONFIG_SMP)
 	for_each_online_cpu(i) {
 		/*
 		 * glibc reads /proc/cpuinfo to determine the number of
@@ -854,45 +851,48 @@ static int c_show(struct seq_file *m, void *v)
 		 * "processor".  Give glibc what it expects.
 		 */
 		seq_printf(m, "processor\t: %d\n", i);
-		seq_printf(m, "BogoMIPS\t: %lu.%02lu\n\n",
+		cpuid = is_smp() ? per_cpu(cpu_data, i).cpuid : read_cpuid_id();
+		seq_printf(m, "model name\t: %s rev %d (%s)\n",
+			   cpu_name, cpuid & 15, elf_platform);
+
+#if defined(CONFIG_SMP)
+		seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
 			   per_cpu(cpu_data, i).loops_per_jiffy / (500000UL/HZ),
 			   (per_cpu(cpu_data, i).loops_per_jiffy / (5000UL/HZ)) % 100);
-	}
-#else /* CONFIG_SMP */
-	seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
-		   loops_per_jiffy / (500000/HZ),
-		   (loops_per_jiffy / (5000/HZ)) % 100);
+#else
+		seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
+			   loops_per_jiffy / (500000/HZ),
+			   (loops_per_jiffy / (5000/HZ)) % 100);
 #endif
+		/* dump out the processor features */
+		seq_puts(m, "Features\t: ");
 
-	/* dump out the processor features */
-	seq_puts(m, "Features\t: ");
-
-	for (i = 0; hwcap_str[i]; i++)
-		if (elf_hwcap & (1 << i))
-			seq_printf(m, "%s ", hwcap_str[i]);
+		for (j = 0; hwcap_str[j]; j++)
+			if (elf_hwcap & (1 << j))
+				seq_printf(m, "%s ", hwcap_str[j]);
 
-	seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24);
-	seq_printf(m, "CPU architecture: %s\n", proc_arch[cpu_architecture()]);
+		seq_printf(m, "\nCPU implementer\t: 0x%02x\n", cpuid >> 24);
+		seq_printf(m, "CPU architecture: %s\n",
+			   proc_arch[cpu_architecture()]);
 
-	if ((read_cpuid_id() & 0x0008f000) == 0x00000000) {
-		/* pre-ARM7 */
-		seq_printf(m, "CPU part\t: %07x\n", read_cpuid_id() >> 4);
-	} else {
-		if ((read_cpuid_id() & 0x0008f000) == 0x00007000) {
-			/* ARM7 */
-			seq_printf(m, "CPU variant\t: 0x%02x\n",
-				   (read_cpuid_id() >> 16) & 127);
+		if ((cpuid & 0x0008f000) == 0x00000000) {
+			/* pre-ARM7 */
+			seq_printf(m, "CPU part\t: %07x\n", cpuid >> 4);
 		} else {
-			/* post-ARM7 */
-			seq_printf(m, "CPU variant\t: 0x%x\n",
-				   (read_cpuid_id() >> 20) & 15);
+			if ((cpuid & 0x0008f000) == 0x00007000) {
+				/* ARM7 */
+				seq_printf(m, "CPU variant\t: 0x%02x\n",
+					   (cpuid >> 16) & 127);
+			} else {
+				/* post-ARM7 */
+				seq_printf(m, "CPU variant\t: 0x%x\n",
+					   (cpuid >> 20) & 15);
+			}
+			seq_printf(m, "CPU part\t: 0x%03x\n",
+				   (cpuid >> 4) & 0xfff);
 		}
-		seq_printf(m, "CPU part\t: 0x%03x\n",
-			   (read_cpuid_id() >> 4) & 0xfff);
+		seq_printf(m, "CPU revision\t: %d\n\n", cpuid & 15);
 	}
-	seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15);
-
-	seq_puts(m, "\n");
 
 	seq_printf(m, "Hardware\t: %s\n", machine_name);
 	seq_printf(m, "Revision\t: %04x\n", system_rev);
-- 
cgit v1.2.3


From dca463daa0151c5bbbd8ec8fd42882a3966d3c44 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Thu, 15 Nov 2012 17:30:32 +0000
Subject: ARM: kernel: enhance MPIDR macro definitions

Kernel subsystems other than the topology layer need the MPIDR
mask definitions to access the MPIDR without relying on hardcoded
masks. This patch moves the MPIDR register masks definition to
a header file and defines a macro to simplify access to MPIDR bit fields
representing affinity levels.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/include/asm/cputype.h | 13 +++++++++++++
 arch/arm/kernel/topology.c     | 27 +--------------------------
 2 files changed, 14 insertions(+), 26 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index cb47d28cbe1..a59dcb5ab5f 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -25,6 +25,19 @@
 #define CPUID_EXT_ISAR4	"c2, 4"
 #define CPUID_EXT_ISAR5	"c2, 5"
 
+#define MPIDR_SMP_BITMASK (0x3 << 30)
+#define MPIDR_SMP_VALUE (0x2 << 30)
+
+#define MPIDR_MT_BITMASK (0x1 << 24)
+
+#define MPIDR_HWID_BITMASK 0xFFFFFF
+
+#define MPIDR_LEVEL_BITS 8
+#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1)
+
+#define MPIDR_AFFINITY_LEVEL(mpidr, level) \
+	((mpidr >> (MPIDR_LEVEL_BITS * level)) & MPIDR_LEVEL_MASK)
+
 extern unsigned int processor_id;
 
 #ifdef CONFIG_CPU_CP15
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 26c12c6440f..cd68d1aa9c3 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -196,32 +196,7 @@ static inline void parse_dt_topology(void) {}
 static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {}
 #endif
 
-
-/*
- * cpu topology management
- */
-
-#define MPIDR_SMP_BITMASK (0x3 << 30)
-#define MPIDR_SMP_VALUE (0x2 << 30)
-
-#define MPIDR_MT_BITMASK (0x1 << 24)
-
-/*
- * These masks reflect the current use of the affinity levels.
- * The affinity level can be up to 16 bits according to ARM ARM
- */
-#define MPIDR_HWID_BITMASK 0xFFFFFF
-
-#define MPIDR_LEVEL0_MASK 0x3
-#define MPIDR_LEVEL0_SHIFT 0
-
-#define MPIDR_LEVEL1_MASK 0xF
-#define MPIDR_LEVEL1_SHIFT 8
-
-#define MPIDR_LEVEL2_MASK 0xFF
-#define MPIDR_LEVEL2_SHIFT 16
-
-/*
+ /*
  * cpu topology table
  */
 struct cputopo_arm cpu_topology[NR_CPUS];
-- 
cgit v1.2.3


From 71db5bfec1349afcbfbd71268c01c658c357b4f3 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Fri, 16 Nov 2012 15:24:06 +0000
Subject: ARM: kernel: update topology to use new MPIDR macros

This patch updates the topology initialization code to use the newly
defined accessors to retrieve the MPIDR affinity levels.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/kernel/topology.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index cd68d1aa9c3..79282ebcd93 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -257,19 +257,14 @@ void store_cpu_topology(unsigned int cpuid)
 
 		if (mpidr & MPIDR_MT_BITMASK) {
 			/* core performance interdependency */
-			cpuid_topo->thread_id = (mpidr >> MPIDR_LEVEL0_SHIFT)
-				& MPIDR_LEVEL0_MASK;
-			cpuid_topo->core_id = (mpidr >> MPIDR_LEVEL1_SHIFT)
-				& MPIDR_LEVEL1_MASK;
-			cpuid_topo->socket_id = (mpidr >> MPIDR_LEVEL2_SHIFT)
-				& MPIDR_LEVEL2_MASK;
+			cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
 		} else {
 			/* largely independent cores */
 			cpuid_topo->thread_id = -1;
-			cpuid_topo->core_id = (mpidr >> MPIDR_LEVEL0_SHIFT)
-				& MPIDR_LEVEL0_MASK;
-			cpuid_topo->socket_id = (mpidr >> MPIDR_LEVEL1_SHIFT)
-				& MPIDR_LEVEL1_MASK;
+			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
 		}
 	} else {
 		/*
-- 
cgit v1.2.3


From cb8cf4f821044f140ea5b9c8d4f816f0c05fab44 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Thu, 8 Nov 2012 18:05:56 +0000
Subject: ARM: kernel: smp_setup_processor_id() updates

This patch applies some basic changes to the smp_setup_processor_id()
ARM implementation to make the code that builds cpu_logical_map more
uniform across the kernel.

The function now prints the full extent of the boot CPU MPIDR[23:0] and
initializes the cpu_logical_map for CPUs up to nr_cpu_ids.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/setup.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index da1d1aa20ad..4515bf6abee 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -426,13 +426,14 @@ int __cpu_logical_map[NR_CPUS];
 void __init smp_setup_processor_id(void)
 {
 	int i;
-	u32 cpu = is_smp() ? read_cpuid_mpidr() & 0xff : 0;
+	u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0;
+	u32 cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
 
 	cpu_logical_map(0) = cpu;
-	for (i = 1; i < NR_CPUS; ++i)
+	for (i = 1; i < nr_cpu_ids; ++i)
 		cpu_logical_map(i) = i == cpu ? 0 : i;
 
-	printk(KERN_INFO "Booting Linux on physical CPU %d\n", cpu);
+	printk(KERN_INFO "Booting Linux on physical CPU 0x%x\n", mpidr);
 }
 
 static void __init setup_processor(void)
-- 
cgit v1.2.3


From a0ae02405076ac32bd17ece976e914b5b6075bb0 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Thu, 17 Nov 2011 17:31:51 +0000
Subject: ARM: kernel: add device tree init map function

When booting through a device tree, the kernel cpu logical id map can be
initialized using device tree data passed by FW or through an embedded blob.

This patch adds a function that parses device tree "cpu" nodes and
retrieves the corresponding CPUs hardware identifiers (MPIDR).
It sets the possible cpus and the cpu logical map values according to
the number of CPUs defined in the device tree and respective properties.

The device tree HW identifiers are considered valid if all CPU nodes contain
a "reg" property, there are no duplicate "reg" entries and the DT defines a
CPU node whose "reg" property matches the MPIDR[23:0] of the boot CPU.

The primary CPU is assigned cpu logical number 0 to keep the current convention
valid.

Current bindings documentation is included in the patch:

Documentation/devicetree/bindings/arm/cpus.txt

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/include/asm/prom.h |   2 +
 arch/arm/kernel/devtree.c   | 100 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/asm/prom.h b/arch/arm/include/asm/prom.h
index aeae9c609df..8dd51dc1a36 100644
--- a/arch/arm/include/asm/prom.h
+++ b/arch/arm/include/asm/prom.h
@@ -15,6 +15,7 @@
 
 extern struct machine_desc *setup_machine_fdt(unsigned int dt_phys);
 extern void arm_dt_memblock_reserve(void);
+extern void __init arm_dt_init_cpu_maps(void);
 
 #else /* CONFIG_OF */
 
@@ -24,6 +25,7 @@ static inline struct machine_desc *setup_machine_fdt(unsigned int dt_phys)
 }
 
 static inline void arm_dt_memblock_reserve(void) { }
+static inline void arm_dt_init_cpu_maps(void) { }
 
 #endif /* CONFIG_OF */
 #endif /* ASMARM_PROM_H */
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index bee7f9d47f0..aaf9add497f 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -19,8 +19,10 @@
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 
+#include <asm/cputype.h>
 #include <asm/setup.h>
 #include <asm/page.h>
+#include <asm/smp_plat.h>
 #include <asm/mach/arch.h>
 #include <asm/mach-types.h>
 
@@ -61,6 +63,104 @@ void __init arm_dt_memblock_reserve(void)
 	}
 }
 
+/*
+ * arm_dt_init_cpu_maps - Function retrieves cpu nodes from the device tree
+ * and builds the cpu logical map array containing MPIDR values related to
+ * logical cpus
+ *
+ * Updates the cpu possible mask with the number of parsed cpu nodes
+ */
+void __init arm_dt_init_cpu_maps(void)
+{
+	/*
+	 * Temp logical map is initialized with UINT_MAX values that are
+	 * considered invalid logical map entries since the logical map must
+	 * contain a list of MPIDR[23:0] values where MPIDR[31:24] must
+	 * read as 0.
+	 */
+	struct device_node *cpu, *cpus;
+	u32 i, j, cpuidx = 1;
+	u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0;
+
+	u32 tmp_map[NR_CPUS] = { [0 ... NR_CPUS-1] = UINT_MAX };
+	bool bootcpu_valid = false;
+	cpus = of_find_node_by_path("/cpus");
+
+	if (!cpus)
+		return;
+
+	for_each_child_of_node(cpus, cpu) {
+		u32 hwid;
+
+		pr_debug(" * %s...\n", cpu->full_name);
+		/*
+		 * A device tree containing CPU nodes with missing "reg"
+		 * properties is considered invalid to build the
+		 * cpu_logical_map.
+		 */
+		if (of_property_read_u32(cpu, "reg", &hwid)) {
+			pr_debug(" * %s missing reg property\n",
+				     cpu->full_name);
+			return;
+		}
+
+		/*
+		 * 8 MSBs must be set to 0 in the DT since the reg property
+		 * defines the MPIDR[23:0].
+		 */
+		if (hwid & ~MPIDR_HWID_BITMASK)
+			return;
+
+		/*
+		 * Duplicate MPIDRs are a recipe for disaster.
+		 * Scan all initialized entries and check for
+		 * duplicates. If any is found just bail out.
+		 * temp values were initialized to UINT_MAX
+		 * to avoid matching valid MPIDR[23:0] values.
+		 */
+		for (j = 0; j < cpuidx; j++)
+			if (WARN(tmp_map[j] == hwid, "Duplicate /cpu reg "
+						     "properties in the DT\n"))
+				return;
+
+		/*
+		 * Build a stashed array of MPIDR values. Numbering scheme
+		 * requires that if detected the boot CPU must be assigned
+		 * logical id 0. Other CPUs get sequential indexes starting
+		 * from 1. If a CPU node with a reg property matching the
+		 * boot CPU MPIDR is detected, this is recorded so that the
+		 * logical map built from DT is validated and can be used
+		 * to override the map created in smp_setup_processor_id().
+		 */
+		if (hwid == mpidr) {
+			i = 0;
+			bootcpu_valid = true;
+		} else {
+			i = cpuidx++;
+		}
+
+		tmp_map[i] = hwid;
+
+		if (cpuidx > nr_cpu_ids)
+			break;
+	}
+
+	if (WARN(!bootcpu_valid, "DT missing boot CPU MPIDR[23:0], "
+				 "fall back to default cpu_logical_map\n"))
+		return;
+
+	/*
+	 * Since the boot CPU node contains proper data, and all nodes have
+	 * a reg property, the DT CPU list can be considered valid and the
+	 * logical map created in smp_setup_processor_id() can be overridden
+	 */
+	for (i = 0; i < cpuidx; i++) {
+		set_cpu_possible(i, true);
+		cpu_logical_map(i) = tmp_map[i];
+		pr_debug("cpu logical map 0x%x\n", cpu_logical_map(i));
+	}
+}
+
 /**
  * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
  * @dt_phys: physical address of dt blob
-- 
cgit v1.2.3


From 5587164eea4aad88fcb79d9b21dc8f14fea598cd Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Wed, 14 Dec 2011 16:01:24 +0000
Subject: ARM: kernel: add cpu logical map DT init in setup_arch

As soon as the device tree is unflattened the cpu logical to physical
mapping is carried out in setup_arch to build a proper array of MPIDR and
corresponding logical indexes.

The mapping could have been carried out using the flattened DT blob and
related primitives, but since the mapping is not needed by early boot
code it can safely be executed when the device tree has been uncompressed to
its tree data structure.

This patch adds the arm_dt_init_cpu maps() function call in setup_arch().

If the kernel is not compiled with DT support the function is empty and
no logical mapping takes place through it; the mapping carried out in
smp_setup_processor_id() is left unchanged.
If DT is supported the mapping created in smp_setup_processor_id() is overriden.
The DT mapping also sets the possible cpus mask, hence platform
code need not set it again in the respective smp_init_cpus() functions.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/kernel/setup.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 4515bf6abee..d15f1c503f3 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -759,6 +759,7 @@ void __init setup_arch(char **cmdline_p)
 
 	unflatten_device_tree();
 
+	arm_dt_init_cpu_maps();
 #ifdef CONFIG_SMP
 	if (is_smp()) {
 		smp_set_ops(mdesc->smp);
-- 
cgit v1.2.3


From 7f124aaf01439d2fa54283f3c375ce3b9fc776d4 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Thu, 17 Nov 2011 17:36:24 +0000
Subject: ARM: kernel: add logical mappings look-up

In ARM SMP systems the MPIDR register ([23:0] bits) is used to uniquely
identify CPUs.

In order to retrieve the logical CPU index corresponding to a given
MPIDR value and guarantee a consistent translation throughout the kernel,
this patch adds a look-up based on the MPIDR[23:0] so that kernel subsystems
can use it whenever the logical cpu index corresponding to a given MPIDR
value is needed.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/include/asm/smp_plat.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index 558d6c80aca..aaa61b6f50f 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -5,6 +5,9 @@
 #ifndef __ASMARM_SMP_PLAT_H
 #define __ASMARM_SMP_PLAT_H
 
+#include <linux/cpumask.h>
+#include <linux/err.h>
+
 #include <asm/cputype.h>
 
 /*
@@ -48,5 +51,19 @@ static inline int cache_ops_need_broadcast(void)
  */
 extern int __cpu_logical_map[];
 #define cpu_logical_map(cpu)	__cpu_logical_map[cpu]
+/*
+ * Retrieve logical cpu index corresponding to a given MPIDR[23:0]
+ *  - mpidr: MPIDR[23:0] to be used for the look-up
+ *
+ * Returns the cpu logical index or -EINVAL on look-up error
+ */
+static inline int get_logical_index(u32 mpidr)
+{
+	int cpu;
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+		if (cpu_logical_map(cpu) == mpidr)
+			return cpu;
+	return -EINVAL;
+}
 
 #endif
-- 
cgit v1.2.3


From 384a290283fde63ba8dc671fca5420111cdac19a Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Wed, 11 Apr 2012 18:55:48 -0400
Subject: ARM: gic: use a private mapping for CPU target interfaces

The GIC interface numbering does not necessarily follow the logical
CPU numbering, especially for complex topologies such as multi-cluster
systems.

Fortunately we can easily probe the GIC to create a mapping as the
Interrupt Processor Targets Registers for the first 32 interrupts are
read-only, and each field returns a value that always corresponds to
the processor reading the register.

Initially all mappings target all CPUs in case an IPI is required to
boot secondary CPUs.  It is refined as those CPUs discover what their
actual mapping is.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/common/gic.c | 45 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 36 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index aa526998418..36ae03a3f5d 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -69,6 +69,14 @@ struct gic_chip_data {
 
 static DEFINE_RAW_SPINLOCK(irq_controller_lock);
 
+/*
+ * The GIC mapping of CPU interfaces does not necessarily match
+ * the logical CPU numbering.  Let's use a mapping as returned
+ * by the GIC itself.
+ */
+#define NR_GIC_CPU_IF 8
+static u8 gic_cpu_map[NR_GIC_CPU_IF] __read_mostly;
+
 /*
  * Supported arch specific GIC irq extension.
  * Default make them NULL.
@@ -238,11 +246,11 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 	unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask);
 	u32 val, mask, bit;
 
-	if (cpu >= 8 || cpu >= nr_cpu_ids)
+	if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
 		return -EINVAL;
 
 	mask = 0xff << shift;
-	bit = 1 << (cpu_logical_map(cpu) + shift);
+	bit = gic_cpu_map[cpu] << shift;
 
 	raw_spin_lock(&irq_controller_lock);
 	val = readl_relaxed(reg) & ~mask;
@@ -349,11 +357,6 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
 	u32 cpumask;
 	unsigned int gic_irqs = gic->gic_irqs;
 	void __iomem *base = gic_data_dist_base(gic);
-	u32 cpu = cpu_logical_map(smp_processor_id());
-
-	cpumask = 1 << cpu;
-	cpumask |= cpumask << 8;
-	cpumask |= cpumask << 16;
 
 	writel_relaxed(0, base + GIC_DIST_CTRL);
 
@@ -366,6 +369,7 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
 	/*
 	 * Set all global interrupts to this CPU only.
 	 */
+	cpumask = readl_relaxed(base + GIC_DIST_TARGET + 0);
 	for (i = 32; i < gic_irqs; i += 4)
 		writel_relaxed(cpumask, base + GIC_DIST_TARGET + i * 4 / 4);
 
@@ -389,8 +393,24 @@ static void __cpuinit gic_cpu_init(struct gic_chip_data *gic)
 {
 	void __iomem *dist_base = gic_data_dist_base(gic);
 	void __iomem *base = gic_data_cpu_base(gic);
+	unsigned int cpu_mask, cpu = smp_processor_id();
 	int i;
 
+	/*
+	 * Get what the GIC says our CPU mask is.
+	 */
+	BUG_ON(cpu >= NR_GIC_CPU_IF);
+	cpu_mask = readl_relaxed(dist_base + GIC_DIST_TARGET + 0);
+	gic_cpu_map[cpu] = cpu_mask;
+
+	/*
+	 * Clear our mask from the other map entries in case they're
+	 * still undefined.
+	 */
+	for (i = 0; i < NR_GIC_CPU_IF; i++)
+		if (i != cpu)
+			gic_cpu_map[i] &= ~cpu_mask;
+
 	/*
 	 * Deal with the banked PPI and SGI interrupts - disable all
 	 * PPI interrupts, ensure all SGI interrupts are enabled.
@@ -646,7 +666,7 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 {
 	irq_hw_number_t hwirq_base;
 	struct gic_chip_data *gic;
-	int gic_irqs, irq_base;
+	int gic_irqs, irq_base, i;
 
 	BUG_ON(gic_nr >= MAX_GIC_NR);
 
@@ -682,6 +702,13 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 		gic_set_base_accessor(gic, gic_get_common_base);
 	}
 
+	/*
+	 * Initialize the CPU interface map to all CPUs.
+	 * It will be refined as each CPU probes its ID.
+	 */
+	for (i = 0; i < NR_GIC_CPU_IF; i++)
+		gic_cpu_map[i] = 0xff;
+
 	/*
 	 * For primary GICs, skip over SGIs.
 	 * For secondary GICs, skip over PPIs, too.
@@ -737,7 +764,7 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 
 	/* Convert our logical CPU mask into a physical one. */
 	for_each_cpu(cpu, mask)
-		map |= 1 << cpu_logical_map(cpu);
+		map |= gic_cpu_map[cpu];
 
 	/*
 	 * Ensure that stores to Normal memory are visible to the
-- 
cgit v1.2.3


From c7cc504bc351e41e871e317ca7f032f4562f34ad Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Thu, 22 Nov 2012 13:05:55 +0100
Subject: ARM: 7584/1: perf: fix link error when CONFIG_HW_PERF_EVENTS is not
 selected

Commit e50c541 (ARM: perf: add guest vs host discrimination) broken the
link as perf_instruction_pointer and perf_misc_flags are not defined
when CONFIG_HW_PERF_EVENTS is not selected.

As it make little sense to try and profile a guest without any HW event,
just fallback to the original code when this config option is not selected.

Reported-by: Russell King <linux@arm.linux.org.uk>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/perf_event.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index 00416edecea..755877527cf 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -21,9 +21,11 @@
 #define C(_x)				PERF_COUNT_HW_CACHE_##_x
 #define CACHE_OP_UNSUPPORTED		0xFFFF
 
+#ifdef CONFIG_HW_PERF_EVENTS
 struct pt_regs;
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs)	perf_misc_flags(regs)
+#endif
 
 #endif /* __ARM_PERF_EVENT_H__ */
-- 
cgit v1.2.3


From ce7b175656a1903605f0184bf33acebff70bfe7f Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Thu, 22 Nov 2012 18:02:54 +0100
Subject: ARM: 7585/1: kernel: fix nr_cpu_ids check in DT logical map init

If a kernel is configured with a DT containing more /cpu nodes than
nr_cpu_ids, the number of cpus must be capped in the DT parsing
code. Current code carries out the check, but fails to cap the
value and the check is executed after the cpu logical index is used,
which can lead to memory corruption due to index overflow.

This patch refactors the check against nr_cpu_ids and move it before
any computed index is used in the parsing code.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Reported-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/devtree.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index aaf9add497f..70f1bdeb241 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -139,10 +139,14 @@ void __init arm_dt_init_cpu_maps(void)
 			i = cpuidx++;
 		}
 
-		tmp_map[i] = hwid;
-
-		if (cpuidx > nr_cpu_ids)
+		if (WARN(cpuidx > nr_cpu_ids, "DT /cpu %u nodes greater than "
+					       "max cores %u, capping them\n",
+					       cpuidx, nr_cpu_ids)) {
+			cpuidx = nr_cpu_ids;
 			break;
+		}
+
+		tmp_map[i] = hwid;
 	}
 
 	if (WARN(!bootcpu_valid, "DT missing boot CPU MPIDR[23:0], "
-- 
cgit v1.2.3


From 3e99675af1b25a191c467700499b1cbe5585a778 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Sun, 25 Nov 2012 03:24:32 +0100
Subject: ARM: 7582/2: rename kvm_seq to vmalloc_seq so to avoid confusion with
 KVM

The kvm_seq value has nothing to do what so ever with this other KVM.
Given that KVM support on ARM is imminent, it's best to rename kvm_seq
into something else to clearly identify what it is about i.e. a sequence
number for vmalloc section mappings.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mmu.h         |  2 +-
 arch/arm/include/asm/mmu_context.h |  6 +++---
 arch/arm/mm/context.c              |  4 ++--
 arch/arm/mm/ioremap.c              | 16 ++++++++--------
 4 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 5b53b53ab5c..9f77e7804f3 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -7,7 +7,7 @@ typedef struct {
 #ifdef CONFIG_CPU_HAS_ASID
 	u64 id;
 #endif
-	unsigned int kvm_seq;
+	unsigned int vmalloc_seq;
 } mm_context_t;
 
 #ifdef CONFIG_CPU_HAS_ASID
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index a64f61cb23d..e1f644bc7cc 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -20,7 +20,7 @@
 #include <asm/proc-fns.h>
 #include <asm-generic/mm_hooks.h>
 
-void __check_kvm_seq(struct mm_struct *mm);
+void __check_vmalloc_seq(struct mm_struct *mm);
 
 #ifdef CONFIG_CPU_HAS_ASID
 
@@ -34,8 +34,8 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
 static inline void check_and_switch_context(struct mm_struct *mm,
 					    struct task_struct *tsk)
 {
-	if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq))
-		__check_kvm_seq(mm);
+	if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
+		__check_vmalloc_seq(mm);
 
 	if (irqs_disabled())
 		/*
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 7a27d7363be..bc4a5e9ebb7 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -186,8 +186,8 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
 	unsigned long flags;
 	unsigned int cpu = smp_processor_id();
 
-	if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq))
-		__check_kvm_seq(mm);
+	if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
+		__check_vmalloc_seq(mm);
 
 	/*
 	 * Required during context switch to avoid speculative page table
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 5dcc2fd46c4..88fd86cf3d9 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -47,18 +47,18 @@ int ioremap_page(unsigned long virt, unsigned long phys,
 }
 EXPORT_SYMBOL(ioremap_page);
 
-void __check_kvm_seq(struct mm_struct *mm)
+void __check_vmalloc_seq(struct mm_struct *mm)
 {
 	unsigned int seq;
 
 	do {
-		seq = init_mm.context.kvm_seq;
+		seq = init_mm.context.vmalloc_seq;
 		memcpy(pgd_offset(mm, VMALLOC_START),
 		       pgd_offset_k(VMALLOC_START),
 		       sizeof(pgd_t) * (pgd_index(VMALLOC_END) -
 					pgd_index(VMALLOC_START)));
-		mm->context.kvm_seq = seq;
-	} while (seq != init_mm.context.kvm_seq);
+		mm->context.vmalloc_seq = seq;
+	} while (seq != init_mm.context.vmalloc_seq);
 }
 
 #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
@@ -89,13 +89,13 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
 		if (!pmd_none(pmd)) {
 			/*
 			 * Clear the PMD from the page table, and
-			 * increment the kvm sequence so others
+			 * increment the vmalloc sequence so others
 			 * notice this change.
 			 *
 			 * Note: this is still racy on SMP machines.
 			 */
 			pmd_clear(pmdp);
-			init_mm.context.kvm_seq++;
+			init_mm.context.vmalloc_seq++;
 
 			/*
 			 * Free the page table, if there was one.
@@ -112,8 +112,8 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
 	 * Ensure that the active_mm is up to date - we want to
 	 * catch any use-after-iounmap cases.
 	 */
-	if (current->active_mm->context.kvm_seq != init_mm.context.kvm_seq)
-		__check_kvm_seq(current->active_mm);
+	if (current->active_mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)
+		__check_vmalloc_seq(current->active_mm);
 
 	flush_tlb_kernel_range(virt, end);
 }
-- 
cgit v1.2.3


From 2148b93ac86665ee70aea684bac003b3deb31dde Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 30 Nov 2012 16:31:13 +0100
Subject: ARM: 7589/1: integrator: pass the lm resource to amba

This passes the lm resource to register the AMBA devices on the
LM as contained within the LM resource.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-integrator/impd1.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c
index e428f3ab15c..5973109fb87 100644
--- a/arch/arm/mach-integrator/impd1.c
+++ b/arch/arm/mach-integrator/impd1.c
@@ -402,9 +402,10 @@ static int impd1_probe(struct lm_device *dev)
 
 		pc_base = dev->resource.start + idev->offset;
 		snprintf(devname, 32, "lm%x:%5.5lx", dev->id, idev->offset >> 12);
-		d = amba_ahb_device_add(&dev->dev, devname, pc_base, SZ_4K,
-					dev->irq, dev->irq,
-					idev->platform_data, idev->id);
+		d = amba_ahb_device_add_res(&dev->dev, devname, pc_base, SZ_4K,
+					    dev->irq, dev->irq,
+					    idev->platform_data, idev->id,
+					    &dev->resource);
 		if (IS_ERR(d)) {
 			dev_err(&dev->dev, "unable to register device: %ld\n", PTR_ERR(d));
 			continue;
-- 
cgit v1.2.3


From 14318efb322e2fe1a034c69463d725209eb9d548 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Thu, 29 Nov 2012 20:39:54 +0100
Subject: ARM: 7587/1: implement optimized percpu variable access

Use the previously unused TPIDRPRW register to store percpu offsets.
TPIDRPRW is only accessible in PL1, so it can only be used in the kernel.

This replaces 2 loads with a mrc instruction for each percpu variable
access. With hackbench, the performance improvement is 1.4% on Cortex-A9
(highbank). Taking an average of 30 runs of "hackbench -l 1000" yields:

Before: 6.2191
After: 6.1348

Will Deacon reported similar delta on v6 with 11MPCore.

The asm "memory clobber" are needed here to ensure the percpu offset
gets reloaded. Testing by Will found that this would not happen in
__schedule() which is a bit of a special case as preemption is disabled
but the execution can move cores.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/Kbuild   |  1 -
 arch/arm/include/asm/percpu.h | 45 +++++++++++++++++++++++++++++++++++++++++++
 arch/arm/kernel/setup.c       |  6 ++++++
 arch/arm/kernel/smp.c         |  4 +++-
 4 files changed, 54 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm/include/asm/percpu.h

(limited to 'arch')

diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index f70ae175a3d..2ffdaacd461 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += local64.h
 generic-y += msgbuf.h
 generic-y += param.h
 generic-y += parport.h
-generic-y += percpu.h
 generic-y += poll.h
 generic-y += resource.h
 generic-y += sections.h
diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
new file mode 100644
index 00000000000..968c0a14e0a
--- /dev/null
+++ b/arch/arm/include/asm/percpu.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2012 Calxeda, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _ASM_ARM_PERCPU_H_
+#define _ASM_ARM_PERCPU_H_
+
+/*
+ * Same as asm-generic/percpu.h, except that we store the per cpu offset
+ * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7
+ */
+#if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6)
+static inline void set_my_cpu_offset(unsigned long off)
+{
+	/* Set TPIDRPRW */
+	asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory");
+}
+
+static inline unsigned long __my_cpu_offset(void)
+{
+	unsigned long off;
+	/* Read TPIDRPRW */
+	asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off) : : "memory");
+	return off;
+}
+#define __my_cpu_offset __my_cpu_offset()
+#else
+#define set_my_cpu_offset(x)	do {} while(0)
+
+#endif /* CONFIG_SMP */
+
+#include <asm-generic/percpu.h>
+
+#endif /* _ASM_ARM_PERCPU_H_ */
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index f739fb1d217..9a89bf4aefe 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -383,6 +383,12 @@ void cpu_init(void)
 		BUG();
 	}
 
+	/*
+	 * This only works on resume and secondary cores. For booting on the
+	 * boot cpu, smp_prepare_boot_cpu is called after percpu area setup.
+	 */
+	set_my_cpu_offset(per_cpu_offset(cpu));
+
 	cpu_proc_init();
 
 	/*
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 7eacd84cdc9..f3a2be5837a 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -314,9 +314,10 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	current->active_mm = mm;
 	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
+	cpu_init();
+
 	printk("CPU%u: Booted secondary processor\n", cpu);
 
-	cpu_init();
 	preempt_disable();
 	trace_hardirqs_off();
 
@@ -372,6 +373,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 void __init smp_prepare_boot_cpu(void)
 {
+	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
 }
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
-- 
cgit v1.2.3


From 026b7c6bf0bf044aa03e2affbda73b6c6a302538 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Mon, 3 Dec 2012 21:13:03 +0100
Subject: ARM: 7590/1: /proc/interrupts: limit the display of IPIs to online
 CPUs only

This is what is done for the regular interrupts in kernel/irqs/proc.c
already, before calling arch_show_interrupts().  Not doing so for the
IPIs causes the column headers not to match with the content whenever
some CPUs are offline.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index fbc8b2623d8..fc4d526e290 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -443,7 +443,7 @@ void show_ipi_list(struct seq_file *p, int prec)
 	for (i = 0; i < NR_IPI; i++) {
 		seq_printf(p, "%*s%u: ", prec - 1, "IPI", i);
 
-		for_each_present_cpu(cpu)
+		for_each_online_cpu(cpu)
 			seq_printf(p, "%10u ",
 				   __get_irq_stat(cpu, ipi_irqs[i]));
 
-- 
cgit v1.2.3


From 76e0920403d3de1a9ed39cffc3ec9fcb00fa4bc9 Mon Sep 17 00:00:00 2001
From: Armando Visconti <armando.visconti@st.com>
Date: Tue, 4 Dec 2012 10:34:39 +0100
Subject: ARM: 7591/1: nommu: Enable the strict alignment (CR_A) bit only if
 ARCH < v6

This patch keeps disabled the strict alignment CP15 bit for
all armv6 and armv7 processor without the mmu. This behaviour
is now same as in the mmu case.

Signed-off-by: Armando Visconti <armando.visconti@st.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/head-nommu.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 278cfc144f4..2c228a07e58 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -68,7 +68,7 @@ __after_proc_init:
 	 * CP15 system control register value returned in r0 from
 	 * the CPU init function.
 	 */
-#ifdef CONFIG_ALIGNMENT_TRAP
+#if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6
 	orr	r0, r0, #CR_A
 #else
 	bic	r0, r0, #CR_A
-- 
cgit v1.2.3


From e91b36efe51b5cbbfe5eb61a653cde5985ae8285 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 4 Dec 2012 12:56:44 +0100
Subject: ARM: 7592/1: nommu: prevent generation of kernel unaligned memory
 accesses

Recent ARMv7 toolchains assume that unaligned memory accesses will not
fault and will instead be handled by the processor.

For the nommu case (without an MPU), memory will be treated as
strongly-ordered and therefore unaligned accesses may fault regardless
of the SCTLR.A setting.

This patch passes -mno-unaligned-access to GCC when compiling for nommu
targets, preventing the generation of unaligned memory access in the
kernel.

Acked-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index f023e3acdfb..cbe364210c7 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -34,6 +34,7 @@ KBUILD_DEFCONFIG := versatile_defconfig
 # defines filename extension depending memory management type.
 ifeq ($(CONFIG_MMU),)
 MMUEXT		:= -nommu
+KBUILD_CFLAGS	+= $(call cc-option,-mno-unaligned-access)
 endif
 
 ifeq ($(CONFIG_FRAME_POINTER),y)
-- 
cgit v1.2.3


From 39b175a0092d4a8e0875c67df82285475b1da591 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 4 Dec 2012 12:57:11 +0100
Subject: ARM: 7593/1: nommu: do not enable DCACHE_WORD_ACCESS when !CONFIG_MMU

Commit b9a50f74905a ("ARM: 7450/1: dcache: select DCACHE_WORD_ACCESS for
little-endian ARMv6+ CPUs") added support for word-at-time path
comparisons, relying on the ability to perform unaligned loads with
negligible performance impact in hardware.

For nommu configurations without MPU support, this is unpredictable and
so we should fall back to the byte-by-byte routines.

Acked-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a40e5f6abda..27ebd9b3dba 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -7,7 +7,7 @@ config ARM
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_EXTABLE_SORT if MMU
 	select CPU_PM if (SUSPEND || CPU_IDLE)
-	select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN
+	select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN && MMU
 	select GENERIC_ATOMIC64 if (CPU_V6 || !CPU_32v6K || !AEABI)
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 	select GENERIC_IRQ_PROBE
-- 
cgit v1.2.3


From 89c2e00978ada02a5b84b361faee954cbc7a0386 Mon Sep 17 00:00:00 2001
From: Schichan Nicolas <nschichan@freebox.fr>
Date: Mon, 10 Dec 2012 14:49:39 +0100
Subject: ARM: 7597/1: net: bpf_jit_32: fix kzalloc gfp/size mismatch.

Official prototype for kzalloc is:

void *kzalloc(size_t, gfp_t);

The ARM bpf_jit code was having the assumption that it was:

void *kzalloc(gfp_t, size);

This was resulting the use of some random GFP flags depending on the
size requested and some random overflows once the really needed size
was more than the value of GFP_KERNEL.

This bug was present since the original inclusion of bpf_jit for ARM
(ddecdfce: ARM: 7259/3: net: JIT compiler for packet filters).

Signed-off-by: Nicolas Schichan <nschichan@freebox.fr>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/net/bpf_jit_32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index c641fb68501..a64d3496830 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -845,7 +845,7 @@ void bpf_jit_compile(struct sk_filter *fp)
 	ctx.skf		= fp;
 	ctx.ret0_fp_idx = -1;
 
-	ctx.offsets = kzalloc(GFP_KERNEL, 4 * (ctx.skf->len + 1));
+	ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL);
 	if (ctx.offsets == NULL)
 		return;
 
@@ -864,7 +864,7 @@ void bpf_jit_compile(struct sk_filter *fp)
 
 	ctx.idx += ctx.imm_count;
 	if (ctx.imm_count) {
-		ctx.imms = kzalloc(GFP_KERNEL, 4 * ctx.imm_count);
+		ctx.imms = kzalloc(4 * ctx.imm_count, GFP_KERNEL);
 		if (ctx.imms == NULL)
 			goto out;
 	}
-- 
cgit v1.2.3


From b10bca0bc699af201770989a88fa293155e9d8de Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 7 Dec 2012 17:34:37 +0100
Subject: ARM: 7595/1: syscall: rework ordering in syscall_trace_exit

syscall_trace_exit is currently doing things back-to-front; invoking
the audit hook *after* signalling the debugger, which presents an
opportunity for the registers to be re-written by userspace in order to
bypass auditing constaints.

This patch fixes the ordering by moving the audit code first and the
tracehook code last. On the face of it, it looks like
current_thread_info()->syscall may be incorrect for the sys_exit
tracepoint, but that's actually not an issue because it will have been
set during syscall entry and cannot have changed since then.

Reported-by: Andrew Gabbasov <Andrew_Gabbasov@mentor.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/entry-common.S |  1 -
 arch/arm/kernel/ptrace.c       | 24 +++++++++++++++---------
 2 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index ee81dbc6fa1..d863bbf0f1f 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -455,7 +455,6 @@ __sys_trace:
 
 __sys_trace_return:
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
-	mov	r1, scno
 	mov	r0, sp
 	bl	syscall_trace_exit
 	b	ret_slow_syscall
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 518536d93fb..03deeffd9f6 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -957,17 +957,23 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
 	return scno;
 }
 
-asmlinkage int syscall_trace_exit(struct pt_regs *regs, int scno)
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
 {
-	current_thread_info()->syscall = scno;
-
-	if (test_thread_flag(TIF_SYSCALL_TRACE))
-		scno = tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
+	/*
+	 * Audit the syscall before anything else, as a debugger may
+	 * come in and change the current registers.
+	 */
+	audit_syscall_exit(regs);
 
+	/*
+	 * Note that we haven't updated the ->syscall field for the
+	 * current thread. This isn't a problem because it will have
+	 * been set on syscall entry and there hasn't been an opportunity
+	 * for a PTRACE_SET_SYSCALL since then.
+	 */
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
-		trace_sys_exit(regs, scno);
-
-	audit_syscall_exit(regs);
+		trace_sys_exit(regs, regs_return_value(regs));
 
-	return scno;
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
 }
-- 
cgit v1.2.3


From fe15f3f1067c56820da44aa92659f2f908fd3caa Mon Sep 17 00:00:00 2001
From: Schichan Nicolas <nschichan@freebox.fr>
Date: Mon, 10 Dec 2012 14:49:40 +0100
Subject: ARM: 7598/1: net: bpf_jit_32: fix sp-relative load/stores offsets.

The offset must be multiplied by 4 to be sure to access the correct
32bit word in the stack scratch space.

For instance, a store at scratch memory cell #1 was generating the
following:

st	r4, [sp, #1]

While the correct code for this is:

st	r4, [sp, #4]

To reproduce the bug (assuming your system has a NIC with the mac
address 52:54:00:12:34:56):

echo 0 > /proc/sys/net/core/bpf_jit_enable
tcpdump -ni eth0 "ether[1] + ether[2] - ether[3] * ether[4] - ether[5] \
	== -0x3AA" # this will capture packets as expected

echo 1 > /proc/sys/net/core/bpf_jit_enable
tcpdump -ni eth0 "ether[1] + ether[2] - ether[3] * ether[4] - ether[5] \
	== -0x3AA" # this will not.

This bug was present since the original inclusion of bpf_jit for ARM
(ddecdfce: ARM: 7259/3: net: JIT compiler for packet filters).

Signed-off-by: Nicolas Schichan <nschichan@freebox.fr>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/net/bpf_jit_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index a64d3496830..b6f305e3b90 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -42,7 +42,7 @@
 #define r_skb_hl	ARM_R8
 
 #define SCRATCH_SP_OFFSET	0
-#define SCRATCH_OFF(k)		(SCRATCH_SP_OFFSET + (k))
+#define SCRATCH_OFF(k)		(SCRATCH_SP_OFFSET + 4 * (k))
 
 #define SEEN_MEM		((1 << BPF_MEMWORDS) - 1)
 #define SEEN_MEM_WORD(k)	(1 << (k))
-- 
cgit v1.2.3


From 1ecec696c8bb9b4cefb09495d81d081d1c81b578 Mon Sep 17 00:00:00 2001
From: Dave Martin <dave.martin@linaro.org>
Date: Mon, 10 Dec 2012 18:35:22 +0100
Subject: ARM: 7599/1: head: Remove boot-time HYP mode check for v5 and below

The kernel can only be entered on HYP mode on CPUs which actually
support it, i.e.  >= ARMv7.  pre-v6 platform support cannot coexist
in the same kernel as support for v7 and higher, so there is no
advantage in having the HYP mode check on pre-v6 hardware.

At least one pre-v6 board is known to fail when the HYP mode check
code is present, although the exact cause remains unknown and may
be unrelated.  [1]

This patch restores the old behaviour for pre-v6 platforms, whereby
the CPSR is forced directly to SVC mode with IRQs and FIQs masked.
All kernels capable of booting on v7 hardware will retain the
check, so this should not impair functionality.

[1] http://lists.arm.linux.org.uk/lurker/message/20121130.013814.19218413.en.html
([ARM] head.S change broke platform device registration?)

Signed-off-by: Dave Martin <dave.martin@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/assembler.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 2ef95813fce..eb87200aa4b 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -250,6 +250,7 @@
  * Beware, it also clobers LR.
  */
 .macro safe_svcmode_maskall reg:req
+#if __LINUX_ARM_ARCH__ >= 6
 	mrs	\reg , cpsr
 	mov	lr , \reg
 	and	lr , lr , #MODE_MASK
@@ -266,6 +267,13 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	__ERET
 1:	msr	cpsr_c, \reg
 2:
+#else
+/*
+ * workaround for possibly broken pre-v6 hardware
+ * (akita, Sharp Zaurus C-1000, PXA270-based)
+ */
+	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, \reg
+#endif
 .endm
 
 /*
-- 
cgit v1.2.3


From 810883f05982e7c27ea20f9ec31c6ee926d00ea1 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Thu, 6 Dec 2012 11:44:59 +0100
Subject: ARM: 7594/1: Add .smp entry for REALVIEW_EB

The REALVIEW EB board can host tiles with multiple cores thus needs
to be able to initialise SMP. There is, however, no .smp entry in
the MACHINE_START struct for REALVIEW_EB.

This patch adds the appropriate .smp entry to this struct.

Signed-off-by: Steve Capper <steve.capper@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-realview/realview_eb.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c
index d3b3cd216d6..28511d43637 100644
--- a/arch/arm/mach-realview/realview_eb.c
+++ b/arch/arm/mach-realview/realview_eb.c
@@ -467,6 +467,7 @@ static void __init realview_eb_init(void)
 MACHINE_START(REALVIEW_EB, "ARM-RealView EB")
 	/* Maintainer: ARM Ltd/Deep Blue Solutions Ltd */
 	.atag_offset	= 0x100,
+	.smp		= smp_ops(realview_smp_ops),
 	.fixup		= realview_fixup,
 	.map_io		= realview_eb_map_io,
 	.init_early	= realview_init_early,
-- 
cgit v1.2.3