From bb0757ebb929d5d6ba484b4313976847285ba280 Mon Sep 17 00:00:00 2001 From: David Daney Date: Wed, 6 Jun 2012 23:00:31 +0100 Subject: MIPS: Unify memcpy.S and memcpy-inatomic.S We can save the 451 lines of code that comprise memcpy-inatomic.S at the expense of a single instruction in the memcpy prolog. We also use an additional register (t6), so this may cause increased register pressure in some places as well. But I think the reduced maintenance burden, of not having two nearly identical implementations, makes it worth it. Signed-off-by: David Daney Cc: linux-mips@linux-mips.org Signed-off-by: Ralf Baechle --- arch/mips/lib/Makefile | 2 +- arch/mips/lib/memcpy-inatomic.S | 451 ---------------------------------------- arch/mips/lib/memcpy.S | 11 + 3 files changed, 12 insertions(+), 452 deletions(-) delete mode 100644 arch/mips/lib/memcpy-inatomic.S (limited to 'arch/mips/lib') diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index 2a7c74fc15f..399a50a541d 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -2,7 +2,7 @@ # Makefile for MIPS-specific library files.. # -lib-y += csum_partial.o delay.o memcpy.o memcpy-inatomic.o memset.o \ +lib-y += csum_partial.o delay.o memcpy.o memset.o \ strlen_user.o strncpy_user.o strnlen_user.o uncached.o obj-y += iomap.o diff --git a/arch/mips/lib/memcpy-inatomic.S b/arch/mips/lib/memcpy-inatomic.S deleted file mode 100644 index 68853a038d3..00000000000 --- a/arch/mips/lib/memcpy-inatomic.S +++ /dev/null @@ -1,451 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Unified implementation of memcpy, memmove and the __copy_user backend. - * - * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org) - * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. - * Copyright (C) 2002 Broadcom, Inc. - * memcpy/copy_user author: Mark Vandevoorde - * Copyright (C) 2007 Maciej W. Rozycki - * - * Mnemonic names for arguments to memcpy/__copy_user - */ - -/* - * Hack to resolve longstanding prefetch issue - * - * Prefetching may be fatal on some systems if we're prefetching beyond the - * end of memory on some systems. It's also a seriously bad idea on non - * dma-coherent systems. - */ -#ifdef CONFIG_DMA_NONCOHERENT -#undef CONFIG_CPU_HAS_PREFETCH -#endif -#ifdef CONFIG_MIPS_MALTA -#undef CONFIG_CPU_HAS_PREFETCH -#endif - -#include -#include -#include - -#define dst a0 -#define src a1 -#define len a2 - -/* - * Spec - * - * memcpy copies len bytes from src to dst and sets v0 to dst. - * It assumes that - * - src and dst don't overlap - * - src is readable - * - dst is writable - * memcpy uses the standard calling convention - * - * __copy_user copies up to len bytes from src to dst and sets a2 (len) to - * the number of uncopied bytes due to an exception caused by a read or write. - * __copy_user assumes that src and dst don't overlap, and that the call is - * implementing one of the following: - * copy_to_user - * - src is readable (no exceptions when reading src) - * copy_from_user - * - dst is writable (no exceptions when writing dst) - * __copy_user uses a non-standard calling convention; see - * include/asm-mips/uaccess.h - * - * When an exception happens on a load, the handler must - # ensure that all of the destination buffer is overwritten to prevent - * leaking information to user mode programs. - */ - -/* - * Implementation - */ - -/* - * The exception handler for loads requires that: - * 1- AT contain the address of the byte just past the end of the source - * of the copy, - * 2- src_entry <= src < AT, and - * 3- (dst - src) == (dst_entry - src_entry), - * The _entry suffix denotes values when __copy_user was called. - * - * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user - * (2) is met by incrementing src by the number of bytes copied - * (3) is met by not doing loads between a pair of increments of dst and src - * - * The exception handlers for stores adjust len (if necessary) and return. - * These handlers do not need to overwrite any data. - * - * For __rmemcpy and memmove an exception is always a kernel bug, therefore - * they're not protected. - */ - -#define EXC(inst_reg,addr,handler) \ -9: inst_reg, addr; \ - .section __ex_table,"a"; \ - PTR 9b, handler; \ - .previous - -/* - * Only on the 64-bit kernel we can made use of 64-bit registers. - */ -#ifdef CONFIG_64BIT -#define USE_DOUBLE -#endif - -#ifdef USE_DOUBLE - -#define LOAD ld -#define LOADL ldl -#define LOADR ldr -#define STOREL sdl -#define STORER sdr -#define STORE sd -#define ADD daddu -#define SUB dsubu -#define SRL dsrl -#define SRA dsra -#define SLL dsll -#define SLLV dsllv -#define SRLV dsrlv -#define NBYTES 8 -#define LOG_NBYTES 3 - -/* - * As we are sharing code base with the mips32 tree (which use the o32 ABI - * register definitions). We need to redefine the register definitions from - * the n64 ABI register naming to the o32 ABI register naming. - */ -#undef t0 -#undef t1 -#undef t2 -#undef t3 -#define t0 $8 -#define t1 $9 -#define t2 $10 -#define t3 $11 -#define t4 $12 -#define t5 $13 -#define t6 $14 -#define t7 $15 - -#else - -#define LOAD lw -#define LOADL lwl -#define LOADR lwr -#define STOREL swl -#define STORER swr -#define STORE sw -#define ADD addu -#define SUB subu -#define SRL srl -#define SLL sll -#define SRA sra -#define SLLV sllv -#define SRLV srlv -#define NBYTES 4 -#define LOG_NBYTES 2 - -#endif /* USE_DOUBLE */ - -#ifdef CONFIG_CPU_LITTLE_ENDIAN -#define LDFIRST LOADR -#define LDREST LOADL -#define STFIRST STORER -#define STREST STOREL -#define SHIFT_DISCARD SLLV -#else -#define LDFIRST LOADL -#define LDREST LOADR -#define STFIRST STOREL -#define STREST STORER -#define SHIFT_DISCARD SRLV -#endif - -#define FIRST(unit) ((unit)*NBYTES) -#define REST(unit) (FIRST(unit)+NBYTES-1) -#define UNIT(unit) FIRST(unit) - -#define ADDRMASK (NBYTES-1) - - .text - .set noreorder -#ifndef CONFIG_CPU_DADDI_WORKAROUNDS - .set noat -#else - .set at=v1 -#endif - -/* - * A combined memcpy/__copy_user - * __copy_user sets len to 0 for success; else to an upper bound of - * the number of uncopied bytes. - * memcpy sets v0 to dst. - */ - .align 5 -LEAF(__copy_user_inatomic) - /* - * Note: dst & src may be unaligned, len may be 0 - * Temps - */ -#define rem t8 - - /* - * The "issue break"s below are very approximate. - * Issue delays for dcache fills will perturb the schedule, as will - * load queue full replay traps, etc. - * - * If len < NBYTES use byte operations. - */ - PREF( 0, 0(src) ) - PREF( 1, 0(dst) ) - sltu t2, len, NBYTES - and t1, dst, ADDRMASK - PREF( 0, 1*32(src) ) - PREF( 1, 1*32(dst) ) - bnez t2, .Lcopy_bytes_checklen - and t0, src, ADDRMASK - PREF( 0, 2*32(src) ) - PREF( 1, 2*32(dst) ) - bnez t1, .Ldst_unaligned - nop - bnez t0, .Lsrc_unaligned_dst_aligned - /* - * use delay slot for fall-through - * src and dst are aligned; need to compute rem - */ -.Lboth_aligned: - SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter - beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES - and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) - PREF( 0, 3*32(src) ) - PREF( 1, 3*32(dst) ) - .align 4 -1: -EXC( LOAD t0, UNIT(0)(src), .Ll_exc) -EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) -EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) -EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) - SUB len, len, 8*NBYTES -EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) -EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy) - STORE t0, UNIT(0)(dst) - STORE t1, UNIT(1)(dst) -EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy) -EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy) - ADD src, src, 8*NBYTES - ADD dst, dst, 8*NBYTES - STORE t2, UNIT(-6)(dst) - STORE t3, UNIT(-5)(dst) - STORE t4, UNIT(-4)(dst) - STORE t7, UNIT(-3)(dst) - STORE t0, UNIT(-2)(dst) - STORE t1, UNIT(-1)(dst) - PREF( 0, 8*32(src) ) - PREF( 1, 8*32(dst) ) - bne len, rem, 1b - nop - - /* - * len == rem == the number of bytes left to copy < 8*NBYTES - */ -.Lcleanup_both_aligned: - beqz len, .Ldone - sltu t0, len, 4*NBYTES - bnez t0, .Lless_than_4units - and rem, len, (NBYTES-1) # rem = len % NBYTES - /* - * len >= 4*NBYTES - */ -EXC( LOAD t0, UNIT(0)(src), .Ll_exc) -EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) -EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) -EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) - SUB len, len, 4*NBYTES - ADD src, src, 4*NBYTES - STORE t0, UNIT(0)(dst) - STORE t1, UNIT(1)(dst) - STORE t2, UNIT(2)(dst) - STORE t3, UNIT(3)(dst) - .set reorder /* DADDI_WAR */ - ADD dst, dst, 4*NBYTES - beqz len, .Ldone - .set noreorder -.Lless_than_4units: - /* - * rem = len % NBYTES - */ - beq rem, len, .Lcopy_bytes - nop -1: -EXC( LOAD t0, 0(src), .Ll_exc) - ADD src, src, NBYTES - SUB len, len, NBYTES - STORE t0, 0(dst) - .set reorder /* DADDI_WAR */ - ADD dst, dst, NBYTES - bne rem, len, 1b - .set noreorder - - /* - * src and dst are aligned, need to copy rem bytes (rem < NBYTES) - * A loop would do only a byte at a time with possible branch - * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE - * because can't assume read-access to dst. Instead, use - * STREST dst, which doesn't require read access to dst. - * - * This code should perform better than a simple loop on modern, - * wide-issue mips processors because the code has fewer branches and - * more instruction-level parallelism. - */ -#define bits t2 - beqz len, .Ldone - ADD t1, dst, len # t1 is just past last byte of dst - li bits, 8*NBYTES - SLL rem, len, 3 # rem = number of bits to keep -EXC( LOAD t0, 0(src), .Ll_exc) - SUB bits, bits, rem # bits = number of bits to discard - SHIFT_DISCARD t0, t0, bits - STREST t0, -1(t1) - jr ra - move len, zero -.Ldst_unaligned: - /* - * dst is unaligned - * t0 = src & ADDRMASK - * t1 = dst & ADDRMASK; T1 > 0 - * len >= NBYTES - * - * Copy enough bytes to align dst - * Set match = (src and dst have same alignment) - */ -#define match rem -EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) - ADD t2, zero, NBYTES -EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) - SUB t2, t2, t1 # t2 = number of bytes copied - xor match, t0, t1 - STFIRST t3, FIRST(0)(dst) - beq len, t2, .Ldone - SUB len, len, t2 - ADD dst, dst, t2 - beqz match, .Lboth_aligned - ADD src, src, t2 - -.Lsrc_unaligned_dst_aligned: - SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter - PREF( 0, 3*32(src) ) - beqz t0, .Lcleanup_src_unaligned - and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES - PREF( 1, 3*32(dst) ) -1: -/* - * Avoid consecutive LD*'s to the same register since some mips - * implementations can't issue them in the same cycle. - * It's OK to load FIRST(N+1) before REST(N) because the two addresses - * are to the same unit (unless src is aligned, but it's not). - */ -EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) -EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) - SUB len, len, 4*NBYTES -EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) -EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) -EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) -EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) -EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) -EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) - PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) - ADD src, src, 4*NBYTES -#ifdef CONFIG_CPU_SB1 - nop # improves slotting -#endif - STORE t0, UNIT(0)(dst) - STORE t1, UNIT(1)(dst) - STORE t2, UNIT(2)(dst) - STORE t3, UNIT(3)(dst) - PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) - .set reorder /* DADDI_WAR */ - ADD dst, dst, 4*NBYTES - bne len, rem, 1b - .set noreorder - -.Lcleanup_src_unaligned: - beqz len, .Ldone - and rem, len, NBYTES-1 # rem = len % NBYTES - beq rem, len, .Lcopy_bytes - nop -1: -EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) -EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) - ADD src, src, NBYTES - SUB len, len, NBYTES - STORE t0, 0(dst) - .set reorder /* DADDI_WAR */ - ADD dst, dst, NBYTES - bne len, rem, 1b - .set noreorder - -.Lcopy_bytes_checklen: - beqz len, .Ldone - nop -.Lcopy_bytes: - /* 0 < len < NBYTES */ -#define COPY_BYTE(N) \ -EXC( lb t0, N(src), .Ll_exc); \ - SUB len, len, 1; \ - beqz len, .Ldone; \ - sb t0, N(dst) - - COPY_BYTE(0) - COPY_BYTE(1) -#ifdef USE_DOUBLE - COPY_BYTE(2) - COPY_BYTE(3) - COPY_BYTE(4) - COPY_BYTE(5) -#endif -EXC( lb t0, NBYTES-2(src), .Ll_exc) - SUB len, len, 1 - jr ra - sb t0, NBYTES-2(dst) -.Ldone: - jr ra - nop - END(__copy_user_inatomic) - -.Ll_exc_copy: - /* - * Copy bytes from src until faulting load address (or until a - * lb faults) - * - * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28) - * may be more than a byte beyond the last address. - * Hence, the lb below may get an exception. - * - * Assumes src < THREAD_BUADDR($28) - */ - LOAD t0, TI_TASK($28) - nop - LOAD t0, THREAD_BUADDR(t0) -1: -EXC( lb t1, 0(src), .Ll_exc) - ADD src, src, 1 - sb t1, 0(dst) # can't fault -- we're copy_from_user - .set reorder /* DADDI_WAR */ - ADD dst, dst, 1 - bne src, t0, 1b - .set noreorder -.Ll_exc: - LOAD t0, TI_TASK($28) - nop - LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address - nop - SUB len, AT, t0 # len number of uncopied bytes - jr ra - nop diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index 56a1f85a1ce..65192c06781 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -182,6 +182,14 @@ .set at=v1 #endif +/* + * t6 is used as a flag to note inatomic mode. + */ +LEAF(__copy_user_inatomic) + b __copy_user_common + li t6, 1 + END(__copy_user_inatomic) + /* * A combined memcpy/__copy_user * __copy_user sets len to 0 for success; else to an upper bound of @@ -193,6 +201,8 @@ LEAF(memcpy) /* a0=dst a1=src a2=len */ move v0, dst /* return value */ .L__memcpy: FEXPORT(__copy_user) + li t6, 0 /* not inatomic */ +__copy_user_common: /* * Note: dst & src may be unaligned, len may be 0 * Temps @@ -458,6 +468,7 @@ EXC( lb t1, 0(src), .Ll_exc) LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address nop SUB len, AT, t0 # len number of uncopied bytes + bnez t6, .Ldone /* Skip the zeroing part if inatomic */ /* * Here's where we rely on src and dst being incremented in tandem, * See (3) above. -- cgit v1.2.3 From 3165c846a2aa7ce25d5f1db6be9960ebeeb9bb0b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 31 Jan 2012 18:18:43 +0100 Subject: MIPS: introduce CPU_GENERIC_DUMP_TLB Allows us not to duplicate more lines in arch/mips/lib/Makefile. Signed-off-by: Florian Fainelli Patchwork: http://patchwork.linux-mips.org/patch/3329/ Signed-off-by: John Crispin --- arch/mips/lib/Makefile | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) (limited to 'arch/mips/lib') diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index 399a50a541d..c4a82e841c7 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -8,28 +8,9 @@ lib-y += csum_partial.o delay.o memcpy.o memset.o \ obj-y += iomap.o obj-$(CONFIG_PCI) += iomap-pci.o -obj-$(CONFIG_CPU_LOONGSON2) += dump_tlb.o -obj-$(CONFIG_CPU_MIPS32) += dump_tlb.o -obj-$(CONFIG_CPU_MIPS64) += dump_tlb.o -obj-$(CONFIG_CPU_NEVADA) += dump_tlb.o -obj-$(CONFIG_CPU_R10000) += dump_tlb.o +obj-$(CONFIG_CPU_GENERIC_DUMP_TLB) += dump_tlb.o obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o -obj-$(CONFIG_CPU_R4300) += dump_tlb.o -obj-$(CONFIG_CPU_R4X00) += dump_tlb.o -obj-$(CONFIG_CPU_R5000) += dump_tlb.o -obj-$(CONFIG_CPU_R5432) += dump_tlb.o -obj-$(CONFIG_CPU_R5500) += dump_tlb.o -obj-$(CONFIG_CPU_R6000) += -obj-$(CONFIG_CPU_R8000) += -obj-$(CONFIG_CPU_RM7000) += dump_tlb.o -obj-$(CONFIG_CPU_RM9000) += dump_tlb.o -obj-$(CONFIG_CPU_SB1) += dump_tlb.o obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o -obj-$(CONFIG_CPU_TX49XX) += dump_tlb.o -obj-$(CONFIG_CPU_VR41XX) += dump_tlb.o -obj-$(CONFIG_CPU_CAVIUM_OCTEON) += dump_tlb.o -obj-$(CONFIG_CPU_XLR) += dump_tlb.o -obj-$(CONFIG_CPU_XLP) += dump_tlb.o # libgcc-style stuff needed in the kernel obj-y += ashldi3.o ashrdi3.o cmpdi2.o lshrdi3.o ucmpdi2.o -- cgit v1.2.3 From 5210edcd527773c227465ad18e416a894966324f Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 28 Sep 2012 11:34:10 -0700 Subject: MIPS: Make __{,n,u}delay declarations match definitions and generic delay.h At some recent point arch/mips/include/asm/delay.h has started being included into csrc-octeon.c where the __?delay() functions are defined. This causes a compile failure due to conflicting declarations and definitions of the functions. It turns out that the generic definitions in arch/mips/lib/delay.c also conflict. Proposed fix: Declare the functions to take unsigned long parameters just like asm-generic (and x86) does. Update __delay to agree (__ndelay and __udelay need no change). Bonus: Get rid of 'inline' from __delay() definition, as it is globally visible, and the compiler should be making this decision itself (it does in fact inline the function without being told to). Signed-off-by: David Daney Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/4354/ Signed-off-by: Ralf Baechle --- arch/mips/lib/delay.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/mips/lib') diff --git a/arch/mips/lib/delay.c b/arch/mips/lib/delay.c index 5995969e8c4..dc81ca8dc0d 100644 --- a/arch/mips/lib/delay.c +++ b/arch/mips/lib/delay.c @@ -15,13 +15,17 @@ #include #include -inline void __delay(unsigned int loops) +void __delay(unsigned long loops) { __asm__ __volatile__ ( " .set noreorder \n" " .align 3 \n" "1: bnez %0, 1b \n" +#if __SIZEOF_LONG__ == 4 " subu %0, 1 \n" +#else + " dsubu %0, 1 \n" +#endif " .set reorder \n" : "=r" (loops) : "0" (loops)); -- cgit v1.2.3 From 01422ff49135e0b747132686405ea8a58f760997 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 17 Oct 2012 01:01:20 +0200 Subject: MIPS: Restore pagemask after dumping the TLB. Or bad things might happen if the last TLB entry isn't a basic size page. Signed-off-by: Ralf Baechle --- arch/mips/lib/dump_tlb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/mips/lib') diff --git a/arch/mips/lib/dump_tlb.c b/arch/mips/lib/dump_tlb.c index 3f69725556a..a99c1d3fc56 100644 --- a/arch/mips/lib/dump_tlb.c +++ b/arch/mips/lib/dump_tlb.c @@ -50,8 +50,9 @@ static void dump_tlb(int first, int last) { unsigned long s_entryhi, entryhi, asid; unsigned long long entrylo0, entrylo1; - unsigned int s_index, pagemask, c0, c1, i; + unsigned int s_index, s_pagemask, pagemask, c0, c1, i; + s_pagemask = read_c0_pagemask(); s_entryhi = read_c0_entryhi(); s_index = read_c0_index(); asid = s_entryhi & 0xff; @@ -103,6 +104,7 @@ static void dump_tlb(int first, int last) write_c0_entryhi(s_entryhi); write_c0_index(s_index); + write_c0_pagemask(s_pagemask); } void dump_tlb_all(void) -- cgit v1.2.3 From 92d11594f688c8b55b51e80f2eac4417396237a4 Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Thu, 6 Sep 2012 11:36:55 -0400 Subject: MIPS: Remove irqflags.h dependency from bitops.h The "else clause" of most functions in bitops.h invoked raw_local_irq_{save,restore}() and in doing so had a dependency on irqflags.h. This fix moves said code to bitops.c, removing the dependency. Signed-off-by: Jim Quinlan Cc: linux-mips@linux-mips.org Cc: David Daney Cc: Kevin Cernekee cernekee@gmail.com Cc: Jim Quinlan Patchwork: https://patchwork.linux-mips.org/patch/4320/ Signed-off-by: Ralf Baechle --- arch/mips/lib/Makefile | 2 +- arch/mips/lib/bitops.c | 179 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 arch/mips/lib/bitops.c (limited to 'arch/mips/lib') diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index c4a82e841c7..a7b89371435 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -2,7 +2,7 @@ # Makefile for MIPS-specific library files.. # -lib-y += csum_partial.o delay.o memcpy.o memset.o \ +lib-y += bitops.o csum_partial.o delay.o memcpy.o memset.o \ strlen_user.o strncpy_user.o strnlen_user.o uncached.o obj-y += iomap.o diff --git a/arch/mips/lib/bitops.c b/arch/mips/lib/bitops.c new file mode 100644 index 00000000000..239a9c957b0 --- /dev/null +++ b/arch/mips/lib/bitops.c @@ -0,0 +1,179 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 1994-1997, 99, 2000, 06, 07 Ralf Baechle (ralf@linux-mips.org) + * Copyright (c) 1999, 2000 Silicon Graphics, Inc. + */ +#include +#include +#include + + +/** + * __mips_set_bit - Atomically set a bit in memory. This is called by + * set_bit() if it cannot find a faster solution. + * @nr: the bit to set + * @addr: the address to start counting from + */ +void __mips_set_bit(unsigned long nr, volatile unsigned long *addr) +{ + volatile unsigned long *a = addr; + unsigned bit = nr & SZLONG_MASK; + unsigned long mask; + unsigned long flags; + + a += nr >> SZLONG_LOG; + mask = 1UL << bit; + raw_local_irq_save(flags); + *a |= mask; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL(__mips_set_bit); + + +/** + * __mips_clear_bit - Clears a bit in memory. This is called by clear_bit() if + * it cannot find a faster solution. + * @nr: Bit to clear + * @addr: Address to start counting from + */ +void __mips_clear_bit(unsigned long nr, volatile unsigned long *addr) +{ + volatile unsigned long *a = addr; + unsigned bit = nr & SZLONG_MASK; + unsigned long mask; + unsigned long flags; + + a += nr >> SZLONG_LOG; + mask = 1UL << bit; + raw_local_irq_save(flags); + *a &= ~mask; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL(__mips_clear_bit); + + +/** + * __mips_change_bit - Toggle a bit in memory. This is called by change_bit() + * if it cannot find a faster solution. + * @nr: Bit to change + * @addr: Address to start counting from + */ +void __mips_change_bit(unsigned long nr, volatile unsigned long *addr) +{ + volatile unsigned long *a = addr; + unsigned bit = nr & SZLONG_MASK; + unsigned long mask; + unsigned long flags; + + a += nr >> SZLONG_LOG; + mask = 1UL << bit; + raw_local_irq_save(flags); + *a ^= mask; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL(__mips_change_bit); + + +/** + * __mips_test_and_set_bit - Set a bit and return its old value. This is + * called by test_and_set_bit() if it cannot find a faster solution. + * @nr: Bit to set + * @addr: Address to count from + */ +int __mips_test_and_set_bit(unsigned long nr, + volatile unsigned long *addr) +{ + volatile unsigned long *a = addr; + unsigned bit = nr & SZLONG_MASK; + unsigned long mask; + unsigned long flags; + unsigned long res; + + a += nr >> SZLONG_LOG; + mask = 1UL << bit; + raw_local_irq_save(flags); + res = (mask & *a); + *a |= mask; + raw_local_irq_restore(flags); + return res; +} +EXPORT_SYMBOL(__mips_test_and_set_bit); + + +/** + * __mips_test_and_set_bit_lock - Set a bit and return its old value. This is + * called by test_and_set_bit_lock() if it cannot find a faster solution. + * @nr: Bit to set + * @addr: Address to count from + */ +int __mips_test_and_set_bit_lock(unsigned long nr, + volatile unsigned long *addr) +{ + volatile unsigned long *a = addr; + unsigned bit = nr & SZLONG_MASK; + unsigned long mask; + unsigned long flags; + unsigned long res; + + a += nr >> SZLONG_LOG; + mask = 1UL << bit; + raw_local_irq_save(flags); + res = (mask & *a); + *a |= mask; + raw_local_irq_restore(flags); + return res; +} +EXPORT_SYMBOL(__mips_test_and_set_bit_lock); + + +/** + * __mips_test_and_clear_bit - Clear a bit and return its old value. This is + * called by test_and_clear_bit() if it cannot find a faster solution. + * @nr: Bit to clear + * @addr: Address to count from + */ +int __mips_test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) +{ + volatile unsigned long *a = addr; + unsigned bit = nr & SZLONG_MASK; + unsigned long mask; + unsigned long flags; + unsigned long res; + + a += nr >> SZLONG_LOG; + mask = 1UL << bit; + raw_local_irq_save(flags); + res = (mask & *a); + *a &= ~mask; + raw_local_irq_restore(flags); + return res; +} +EXPORT_SYMBOL(__mips_test_and_clear_bit); + + +/** + * __mips_test_and_change_bit - Change a bit and return its old value. This is + * called by test_and_change_bit() if it cannot find a faster solution. + * @nr: Bit to change + * @addr: Address to count from + */ +int __mips_test_and_change_bit(unsigned long nr, volatile unsigned long *addr) +{ + volatile unsigned long *a = addr; + unsigned bit = nr & SZLONG_MASK; + unsigned long mask; + unsigned long flags; + unsigned long res; + + a += nr >> SZLONG_LOG; + mask = 1UL << bit; + raw_local_irq_save(flags); + res = (mask & *a); + *a ^= mask; + raw_local_irq_restore(flags); + return res; +} +EXPORT_SYMBOL(__mips_test_and_change_bit); -- cgit v1.2.3 From e97c5b609880d97313b13eb71830fca62cee50c2 Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Thu, 6 Sep 2012 11:36:56 -0400 Subject: MIPS: Make irqflags.h functions preempt-safe for non-mipsr2 cpus For non MIPSr2 processors, such as the BMIPS 5000, calls to arch_local_irq_disable() and others may be preempted, and in doing so a stale value may be restored to c0_status. This fix disables preemption for such processors prior to the call and enables it after the call. Those functions that needed this fix have been "outlined" to mips-atomic.c, as they are no longer good candidates for inlining. This bug was observed in a BMIPS 5000, occuring once every few hours in a continuous reboot test. It was traced to the write_lock_irq() function which was being invoked in release_task() in exit.c. By placing a number of "nops" inbetween the mfc0/mtc0 pair in arch_local_irq_disable(), which is called by write_lock_irq(), we were able to greatly increase the occurance of this bug. Similarly, the application of this commit silenced the bug. Signed-off-by: Jim Quinlan Cc: linux-mips@linux-mips.org Cc: David Daney Cc: Kevin Cernekee cernekee@gmail.com Cc: Jim Quinlan Patchwork: https://patchwork.linux-mips.org/patch/4321/ Signed-off-by: Ralf Baechle --- arch/mips/lib/Makefile | 3 +- arch/mips/lib/mips-atomic.c | 176 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 arch/mips/lib/mips-atomic.c (limited to 'arch/mips/lib') diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index a7b89371435..eeddc58802e 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -3,7 +3,8 @@ # lib-y += bitops.o csum_partial.o delay.o memcpy.o memset.o \ - strlen_user.o strncpy_user.o strnlen_user.o uncached.o + mips-atomic.o strlen_user.o strncpy_user.o \ + strnlen_user.o uncached.o obj-y += iomap.o obj-$(CONFIG_PCI) += iomap-pci.o diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c new file mode 100644 index 00000000000..e091430dbeb --- /dev/null +++ b/arch/mips/lib/mips-atomic.c @@ -0,0 +1,176 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1994, 95, 96, 97, 98, 99, 2003 by Ralf Baechle + * Copyright (C) 1996 by Paul M. Antoine + * Copyright (C) 1999 Silicon Graphics + * Copyright (C) 2000 MIPS Technologies, Inc. + */ +#include +#include +#include +#include +#include + +#if !defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_MIPS_MT_SMTC) + +/* + * For cli() we have to insert nops to make sure that the new value + * has actually arrived in the status register before the end of this + * macro. + * R4000/R4400 need three nops, the R4600 two nops and the R10000 needs + * no nops at all. + */ +/* + * For TX49, operating only IE bit is not enough. + * + * If mfc0 $12 follows store and the mfc0 is last instruction of a + * page and fetching the next instruction causes TLB miss, the result + * of the mfc0 might wrongly contain EXL bit. + * + * ERT-TX49H2-027, ERT-TX49H3-012, ERT-TX49HL3-006, ERT-TX49H4-008 + * + * Workaround: mask EXL bit of the result or place a nop before mfc0. + */ +__asm__( + " .macro arch_local_irq_disable\n" + " .set push \n" + " .set noat \n" +#ifdef CONFIG_MIPS_MT_SMTC + " mfc0 $1, $2, 1 \n" + " ori $1, 0x400 \n" + " .set noreorder \n" + " mtc0 $1, $2, 1 \n" +#elif defined(CONFIG_CPU_MIPSR2) + /* see irqflags.h for inline function */ +#else + " mfc0 $1,$12 \n" + " ori $1,0x1f \n" + " xori $1,0x1f \n" + " .set noreorder \n" + " mtc0 $1,$12 \n" +#endif + " irq_disable_hazard \n" + " .set pop \n" + " .endm \n"); + +void arch_local_irq_disable(void) +{ + preempt_disable(); + __asm__ __volatile__( + "arch_local_irq_disable" + : /* no outputs */ + : /* no inputs */ + : "memory"); + preempt_enable(); +} +EXPORT_SYMBOL(arch_local_irq_disable); + + +__asm__( + " .macro arch_local_irq_save result \n" + " .set push \n" + " .set reorder \n" + " .set noat \n" +#ifdef CONFIG_MIPS_MT_SMTC + " mfc0 \\result, $2, 1 \n" + " ori $1, \\result, 0x400 \n" + " .set noreorder \n" + " mtc0 $1, $2, 1 \n" + " andi \\result, \\result, 0x400 \n" +#elif defined(CONFIG_CPU_MIPSR2) + /* see irqflags.h for inline function */ +#else + " mfc0 \\result, $12 \n" + " ori $1, \\result, 0x1f \n" + " xori $1, 0x1f \n" + " .set noreorder \n" + " mtc0 $1, $12 \n" +#endif + " irq_disable_hazard \n" + " .set pop \n" + " .endm \n"); + +unsigned long arch_local_irq_save(void) +{ + unsigned long flags; + preempt_disable(); + asm volatile("arch_local_irq_save\t%0" + : "=r" (flags) + : /* no inputs */ + : "memory"); + preempt_enable(); + return flags; +} +EXPORT_SYMBOL(arch_local_irq_save); + + +__asm__( + " .macro arch_local_irq_restore flags \n" + " .set push \n" + " .set noreorder \n" + " .set noat \n" +#ifdef CONFIG_MIPS_MT_SMTC + "mfc0 $1, $2, 1 \n" + "andi \\flags, 0x400 \n" + "ori $1, 0x400 \n" + "xori $1, 0x400 \n" + "or \\flags, $1 \n" + "mtc0 \\flags, $2, 1 \n" +#elif defined(CONFIG_CPU_MIPSR2) && defined(CONFIG_IRQ_CPU) + /* see irqflags.h for inline function */ +#elif defined(CONFIG_CPU_MIPSR2) + /* see irqflags.h for inline function */ +#else + " mfc0 $1, $12 \n" + " andi \\flags, 1 \n" + " ori $1, 0x1f \n" + " xori $1, 0x1f \n" + " or \\flags, $1 \n" + " mtc0 \\flags, $12 \n" +#endif + " irq_disable_hazard \n" + " .set pop \n" + " .endm \n"); + +void arch_local_irq_restore(unsigned long flags) +{ + unsigned long __tmp1; + +#ifdef CONFIG_MIPS_MT_SMTC + /* + * SMTC kernel needs to do a software replay of queued + * IPIs, at the cost of branch and call overhead on each + * local_irq_restore() + */ + if (unlikely(!(flags & 0x0400))) + smtc_ipi_replay(); +#endif + preempt_disable(); + __asm__ __volatile__( + "arch_local_irq_restore\t%0" + : "=r" (__tmp1) + : "0" (flags) + : "memory"); + preempt_enable(); +} +EXPORT_SYMBOL(arch_local_irq_restore); + + +void __arch_local_irq_restore(unsigned long flags) +{ + unsigned long __tmp1; + + preempt_disable(); + __asm__ __volatile__( + "arch_local_irq_restore\t%0" + : "=r" (__tmp1) + : "0" (flags) + : "memory"); + preempt_enable(); +} +EXPORT_SYMBOL(__arch_local_irq_restore); + +#endif /* !defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_MIPS_MT_SMTC) */ -- cgit v1.2.3 From f93a1a00f2bd550f86fd1a9f83c493755aecd15f Mon Sep 17 00:00:00 2001 From: Al Cooper Date: Thu, 15 Nov 2012 18:16:14 -0500 Subject: MIPS: Fix crash that occurs when function tracing is enabled A recent patch changed some irq routines from inlines to functions. These routines are called by the tracer code. Now that they're functions, if they are compiled for function tracing they will call the tracer and crash the system due to infinite recursion. The fix disables tracing in these functions by using "notrace" in the function definition. Signed-off-by: Al Cooper Reviewed-by: David Daney Pathchwork: https://patchwork.linux-mips.org/patch/4564/ Signed-off-by: Ralf Baechle --- arch/mips/lib/mips-atomic.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/mips/lib') diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c index e091430dbeb..cd160be3ce4 100644 --- a/arch/mips/lib/mips-atomic.c +++ b/arch/mips/lib/mips-atomic.c @@ -56,7 +56,7 @@ __asm__( " .set pop \n" " .endm \n"); -void arch_local_irq_disable(void) +notrace void arch_local_irq_disable(void) { preempt_disable(); __asm__ __volatile__( @@ -93,7 +93,7 @@ __asm__( " .set pop \n" " .endm \n"); -unsigned long arch_local_irq_save(void) +notrace unsigned long arch_local_irq_save(void) { unsigned long flags; preempt_disable(); @@ -135,7 +135,7 @@ __asm__( " .set pop \n" " .endm \n"); -void arch_local_irq_restore(unsigned long flags) +notrace void arch_local_irq_restore(unsigned long flags) { unsigned long __tmp1; @@ -159,7 +159,7 @@ void arch_local_irq_restore(unsigned long flags) EXPORT_SYMBOL(arch_local_irq_restore); -void __arch_local_irq_restore(unsigned long flags) +notrace void __arch_local_irq_restore(unsigned long flags) { unsigned long __tmp1; -- cgit v1.2.3 From 4ea494b528ac1b9df9f0c77ba49e3e8ee108d9ec Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 Dec 2012 18:12:17 +0000 Subject: MIPS: delay.c: Check BITS_PER_LONG instead of __SIZEOF_LONG__ When building a 32-bit kernel for RBTX4927 with gcc version 4.1.2 20061115 (prerelease) (Ubuntu 4.1.1-21), I get: arch/mips/lib/delay.c:24:5: warning: "__SIZEOF_LONG__" is not defined As a consequence, __delay() always uses the 64-bit "dsubu" instruction. Replace the check for "__SIZEOF_LONG__ == 4" by "BITS_PER_LONG == 32" to fix this. Introduced by commit 5210edcd527773c227465ad18e416a894966324f [MIPS: Make __{,n,u}delay declarations match definitions and generic delay.h"] Signed-off-by: Geert Uytterhoeven Patchwork: https://patchwork.linux-mips.org/patch/4678/ Acked-by: David Daney Signed-off-by: Ralf Baechle --- arch/mips/lib/delay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/mips/lib') diff --git a/arch/mips/lib/delay.c b/arch/mips/lib/delay.c index dc81ca8dc0d..288f7954988 100644 --- a/arch/mips/lib/delay.c +++ b/arch/mips/lib/delay.c @@ -21,7 +21,7 @@ void __delay(unsigned long loops) " .set noreorder \n" " .align 3 \n" "1: bnez %0, 1b \n" -#if __SIZEOF_LONG__ == 4 +#if BITS_PER_LONG == 32 " subu %0, 1 \n" #else " dsubu %0, 1 \n" -- cgit v1.2.3