From 619b6e18fce20e4b2d0082cde989f37e1be7b3e1 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 23 Oct 2007 12:43:25 +0100 Subject: [MIPS] R4000/R4400 daddiu erratum workaround This complements the generic R4000/R4400 errata workaround code and adds bits for the daddiu problem. In most places it just modifies handwritten assembly code so that the assembler is allowed to use a temporary register as daddiu may now be treated as a macro that expands to a sequence of li and daddu. It is the AT register or, where AT is unavailable or used explicitly for another purpose, an explicitly-named register is selected, using the .set at= feature added recently to gas. This feature is only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the workaround remains disabled, the required version of binutils stays unchanged. Similarly, daddiu instructions put in branch delay slots in noreorder fragments are now taken out of them and the assembler is allowed to reorder them itself as possible (which it does making the whole idea of scheduling them into delay slots manually questionable). Also in the very few places where such a simple conversion was not possible, a handcoded longer sequence is implemented. Other than that there are changes to code responsible for building the TLB fault and page clear/copy handlers to avoid daddiu as appropriate. These are only effective if the erratum is verified to be present at the run time. Finally there is a trivial update to __delay(), because it uses daddiu in a branch delay slot. Signed-off-by: Maciej W. Rozycki Signed-off-by: Ralf Baechle --- arch/mips/lib/memcpy.S | 60 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 13 deletions(-) (limited to 'arch/mips/lib/memcpy.S') diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index a526c62cb76..aded7b15905 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -9,6 +9,7 @@ * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. * Copyright (C) 2002 Broadcom, Inc. * memcpy/copy_user author: Mark Vandevoorde + * Copyright (C) 2007 Maciej W. Rozycki * * Mnemonic names for arguments to memcpy/__copy_user */ @@ -175,7 +176,11 @@ .text .set noreorder +#ifndef CONFIG_CPU_DADDI_WORKAROUNDS .set noat +#else + .set at=v1 +#endif /* * A combined memcpy/__copy_user @@ -271,8 +276,10 @@ EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) + .set reorder /* DADDI_WAR */ + ADD dst, dst, 4*NBYTES beqz len, done - ADD dst, dst, 4*NBYTES + .set noreorder less_than_4units: /* * rem = len % NBYTES @@ -284,8 +291,10 @@ EXC( LOAD t0, 0(src), l_exc) ADD src, src, NBYTES SUB len, len, NBYTES EXC( STORE t0, 0(dst), s_exc_p1u) + .set reorder /* DADDI_WAR */ + ADD dst, dst, NBYTES bne rem, len, 1b - ADD dst, dst, NBYTES + .set noreorder /* * src and dst are aligned, need to copy rem bytes (rem < NBYTES) @@ -364,8 +373,10 @@ EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) + .set reorder /* DADDI_WAR */ + ADD dst, dst, 4*NBYTES bne len, rem, 1b - ADD dst, dst, 4*NBYTES + .set noreorder cleanup_src_unaligned: beqz len, done @@ -378,8 +389,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy) ADD src, src, NBYTES SUB len, len, NBYTES EXC( STORE t0, 0(dst), s_exc_p1u) + .set reorder /* DADDI_WAR */ + ADD dst, dst, NBYTES bne len, rem, 1b - ADD dst, dst, NBYTES + .set noreorder copy_bytes_checklen: beqz len, done @@ -427,8 +440,10 @@ l_exc_copy: EXC( lb t1, 0(src), l_exc) ADD src, src, 1 sb t1, 0(dst) # can't fault -- we're copy_from_user + .set reorder /* DADDI_WAR */ + ADD dst, dst, 1 bne src, t0, 1b - ADD dst, dst, 1 + .set noreorder l_exc: LOAD t0, TI_TASK($28) nop @@ -446,20 +461,33 @@ l_exc: * Clear len bytes starting at dst. Can't call __bzero because it * might modify len. An inefficient loop for these rare times... */ + .set reorder /* DADDI_WAR */ + SUB src, len, 1 beqz len, done - SUB src, len, 1 + .set noreorder 1: sb zero, 0(dst) ADD dst, dst, 1 +#ifndef CONFIG_CPU_DADDI_WORKAROUNDS bnez src, 1b SUB src, src, 1 +#else + .set push + .set noat + li v1, 1 + bnez src, 1b + SUB src, src, v1 + .set pop +#endif jr ra nop -#define SEXC(n) \ -s_exc_p ## n ## u: \ - jr ra; \ - ADD len, len, n*NBYTES +#define SEXC(n) \ + .set reorder; /* DADDI_WAR */ \ +s_exc_p ## n ## u: \ + ADD len, len, n*NBYTES; \ + jr ra; \ + .set noreorder SEXC(8) SEXC(7) @@ -471,8 +499,10 @@ SEXC(2) SEXC(1) s_exc_p1: + .set reorder /* DADDI_WAR */ + ADD len, len, 1 jr ra - ADD len, len, 1 + .set noreorder s_exc: jr ra nop @@ -502,8 +532,10 @@ r_end_bytes: SUB a2, a2, 0x1 sb t0, -1(a0) SUB a1, a1, 0x1 + .set reorder /* DADDI_WAR */ + SUB a0, a0, 0x1 bnez a2, r_end_bytes - SUB a0, a0, 0x1 + .set noreorder r_out: jr ra @@ -514,8 +546,10 @@ r_end_bytes_up: SUB a2, a2, 0x1 sb t0, (a0) ADD a1, a1, 0x1 + .set reorder /* DADDI_WAR */ + ADD a0, a0, 0x1 bnez a2, r_end_bytes_up - ADD a0, a0, 0x1 + .set noreorder jr ra move a2, zero -- cgit v1.2.3