From f674ef7b43881b2ac11f98d6ba2dc5d9dd0dd118 Mon Sep 17 00:00:00 2001 From: Jim Kukunas Date: Tue, 22 May 2012 13:54:16 +1000 Subject: lib/raid6: fix test program build drags in headers which are not visible to userspace, thus breaking the build for the test program. Signed-off-by: Jim Kukunas Signed-off-by: NeilBrown --- lib/raid6/algos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 8b02f60ffc8..f6a0f789916 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -17,11 +17,11 @@ */ #include -#include #ifndef __KERNEL__ #include #include #else +#include #include #if !RAID6_USE_EMPTY_ZERO_PAGE /* In .bss so it's zeroed */ -- cgit v1.2.3 From 048a8b8c89dc427dd7a58527c8923224b1e66d83 Mon Sep 17 00:00:00 2001 From: Jim Kukunas Date: Tue, 22 May 2012 13:54:18 +1000 Subject: lib/raid6: Add SSSE3 optimized recovery functions Add SSSE3 optimized recovery functions, as well as a system for selecting the most appropriate recovery functions to use. Originally-by: H. Peter Anvin Signed-off-by: Jim Kukunas Signed-off-by: NeilBrown --- lib/raid6/Makefile | 2 +- lib/raid6/algos.c | 37 ++++++ lib/raid6/mktables.c | 25 ++++ lib/raid6/recov.c | 15 ++- lib/raid6/recov_ssse3.c | 335 ++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 409 insertions(+), 5 deletions(-) create mode 100644 lib/raid6/recov_ssse3.c (limited to 'lib') diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 8a38102770f..de06dfe165b 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o -raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ +raid6_pq-y += algos.o recov.o recov_ssse3.o tables.o int1.o int2.o int4.o \ int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \ altivec8.o mmx.o sse1.o sse2.o hostprogs-y += mktables diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index f6a0f789916..5a7f8022be1 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -64,6 +64,20 @@ const struct raid6_calls * const raid6_algos[] = { NULL }; +void (*raid6_2data_recov)(int, size_t, int, int, void **); +EXPORT_SYMBOL_GPL(raid6_2data_recov); + +void (*raid6_datap_recov)(int, size_t, int, void **); +EXPORT_SYMBOL_GPL(raid6_datap_recov); + +const struct raid6_recov_calls *const raid6_recov_algos[] = { +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + &raid6_recov_ssse3, +#endif + &raid6_recov_intx1, + NULL +}; + #ifdef __KERNEL__ #define RAID6_TIME_JIFFIES_LG2 4 #else @@ -72,6 +86,26 @@ const struct raid6_calls * const raid6_algos[] = { #define time_before(x, y) ((x) < (y)) #endif +static inline void raid6_choose_recov(void) +{ + const struct raid6_recov_calls *const *algo; + const struct raid6_recov_calls *best; + + for (best = NULL, algo = raid6_recov_algos; *algo; algo++) + if (!best || (*algo)->priority > best->priority) + if (!(*algo)->valid || (*algo)->valid()) + best = *algo; + + if (best) { + raid6_2data_recov = best->data2; + raid6_datap_recov = best->datap; + + printk("raid6: using %s recovery algorithm\n", best->name); + } else + printk("raid6: Yikes! No recovery algorithm found!\n"); +} + + /* Try to pick the best algorithm */ /* This code uses the gfmul table as convenient data set to abuse */ @@ -141,6 +175,9 @@ int __init raid6_select_algo(void) free_pages((unsigned long)syndromes, 1); + /* select raid recover functions */ + raid6_choose_recov(); + return best ? 0 : -EINVAL; } diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c index 8a3780902ce..39787db588b 100644 --- a/lib/raid6/mktables.c +++ b/lib/raid6/mktables.c @@ -81,6 +81,31 @@ int main(int argc, char *argv[]) printf("EXPORT_SYMBOL(raid6_gfmul);\n"); printf("#endif\n"); + /* Compute vector multiplication table */ + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_vgfmul[256][32] =\n" + "{\n"); + for (i = 0; i < 256; i++) { + printf("\t{\n"); + for (j = 0; j < 16; j += 8) { + printf("\t\t"); + for (k = 0; k < 8; k++) + printf("0x%02x,%c", gfmul(i, j + k), + (k == 7) ? '\n' : ' '); + } + for (j = 0; j < 16; j += 8) { + printf("\t\t"); + for (k = 0; k < 8; k++) + printf("0x%02x,%c", gfmul(i, (j + k) << 4), + (k == 7) ? '\n' : ' '); + } + printf("\t},\n"); + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_vgfmul);\n"); + printf("#endif\n"); + /* Compute power-of-2 table (exponent) */ v = 1; printf("\nconst u8 __attribute__((aligned(256)))\n" diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c index fe275d7b6b3..1805a5cc5da 100644 --- a/lib/raid6/recov.c +++ b/lib/raid6/recov.c @@ -22,7 +22,7 @@ #include /* Recover two failed data blocks. */ -void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, +void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb, void **ptrs) { u8 *p, *q, *dp, *dq; @@ -64,10 +64,9 @@ void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, p++; q++; } } -EXPORT_SYMBOL_GPL(raid6_2data_recov); /* Recover failure of one data block plus the P block */ -void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) +void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, void **ptrs) { u8 *p, *q, *dq; const u8 *qmul; /* Q multiplier table */ @@ -96,7 +95,15 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) q++; dq++; } } -EXPORT_SYMBOL_GPL(raid6_datap_recov); + + +const struct raid6_recov_calls raid6_recov_intx1 = { + .data2 = raid6_2data_recov_intx1, + .datap = raid6_datap_recov_intx1, + .valid = NULL, + .name = "intx1", + .priority = 0, +}; #ifndef __KERNEL__ /* Testing only */ diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c new file mode 100644 index 00000000000..37ae6193055 --- /dev/null +++ b/lib/raid6/recov_ssse3.c @@ -0,0 +1,335 @@ +/* + * Copyright (C) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + +#include +#include "x86.h" + +static int raid6_has_ssse3(void) +{ + return boot_cpu_has(X86_FEATURE_XMM) && + boot_cpu_has(X86_FEATURE_XMM2) && + boot_cpu_has(X86_FEATURE_SSSE3); +} + +void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, int failb, + void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + static const u8 __aligned(16) x0f[16] = { + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f}; + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data pages + Use the dead data pages as temporary storage for + delta p and delta q */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks-2] = p; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ + raid6_gfexp[failb]]]; + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm7" : : "m" (x0f[0])); + +#ifdef CONFIG_X86_64 + asm volatile("movdqa %0,%%xmm6" : : "m" (qmul[0])); + asm volatile("movdqa %0,%%xmm14" : : "m" (pbmul[0])); + asm volatile("movdqa %0,%%xmm15" : : "m" (pbmul[16])); +#endif + + /* Now do it... */ + while (bytes) { +#ifdef CONFIG_X86_64 + /* xmm6, xmm14, xmm15 */ + + asm volatile("movdqa %0,%%xmm1" : : "m" (q[0])); + asm volatile("movdqa %0,%%xmm9" : : "m" (q[16])); + asm volatile("movdqa %0,%%xmm0" : : "m" (p[0])); + asm volatile("movdqa %0,%%xmm8" : : "m" (p[16])); + asm volatile("pxor %0,%%xmm1" : : "m" (dq[0])); + asm volatile("pxor %0,%%xmm9" : : "m" (dq[16])); + asm volatile("pxor %0,%%xmm0" : : "m" (dp[0])); + asm volatile("pxor %0,%%xmm8" : : "m" (dp[16])); + + /* xmm0/8 = px */ + + asm volatile("movdqa %xmm6,%xmm4"); + asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16])); + asm volatile("movdqa %xmm6,%xmm12"); + asm volatile("movdqa %xmm5,%xmm13"); + asm volatile("movdqa %xmm1,%xmm3"); + asm volatile("movdqa %xmm9,%xmm11"); + asm volatile("movdqa %xmm0,%xmm2"); /* xmm2/10 = px */ + asm volatile("movdqa %xmm8,%xmm10"); + asm volatile("psraw $4,%xmm1"); + asm volatile("psraw $4,%xmm9"); + asm volatile("pand %xmm7,%xmm3"); + asm volatile("pand %xmm7,%xmm11"); + asm volatile("pand %xmm7,%xmm1"); + asm volatile("pand %xmm7,%xmm9"); + asm volatile("pshufb %xmm3,%xmm4"); + asm volatile("pshufb %xmm11,%xmm12"); + asm volatile("pshufb %xmm1,%xmm5"); + asm volatile("pshufb %xmm9,%xmm13"); + asm volatile("pxor %xmm4,%xmm5"); + asm volatile("pxor %xmm12,%xmm13"); + + /* xmm5/13 = qx */ + + asm volatile("movdqa %xmm14,%xmm4"); + asm volatile("movdqa %xmm15,%xmm1"); + asm volatile("movdqa %xmm14,%xmm12"); + asm volatile("movdqa %xmm15,%xmm9"); + asm volatile("movdqa %xmm2,%xmm3"); + asm volatile("movdqa %xmm10,%xmm11"); + asm volatile("psraw $4,%xmm2"); + asm volatile("psraw $4,%xmm10"); + asm volatile("pand %xmm7,%xmm3"); + asm volatile("pand %xmm7,%xmm11"); + asm volatile("pand %xmm7,%xmm2"); + asm volatile("pand %xmm7,%xmm10"); + asm volatile("pshufb %xmm3,%xmm4"); + asm volatile("pshufb %xmm11,%xmm12"); + asm volatile("pshufb %xmm2,%xmm1"); + asm volatile("pshufb %xmm10,%xmm9"); + asm volatile("pxor %xmm4,%xmm1"); + asm volatile("pxor %xmm12,%xmm9"); + + /* xmm1/9 = pbmul[px] */ + asm volatile("pxor %xmm5,%xmm1"); + asm volatile("pxor %xmm13,%xmm9"); + /* xmm1/9 = db = DQ */ + asm volatile("movdqa %%xmm1,%0" : "=m" (dq[0])); + asm volatile("movdqa %%xmm9,%0" : "=m" (dq[16])); + + asm volatile("pxor %xmm1,%xmm0"); + asm volatile("pxor %xmm9,%xmm8"); + asm volatile("movdqa %%xmm0,%0" : "=m" (dp[0])); + asm volatile("movdqa %%xmm8,%0" : "=m" (dp[16])); + + bytes -= 32; + p += 32; + q += 32; + dp += 32; + dq += 32; +#else + asm volatile("movdqa %0,%%xmm1" : : "m" (*q)); + asm volatile("movdqa %0,%%xmm0" : : "m" (*p)); + asm volatile("pxor %0,%%xmm1" : : "m" (*dq)); + asm volatile("pxor %0,%%xmm0" : : "m" (*dp)); + + /* 1 = dq ^ q + * 0 = dp ^ p + */ + asm volatile("movdqa %0,%%xmm4" : : "m" (qmul[0])); + asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16])); + + asm volatile("movdqa %xmm1,%xmm3"); + asm volatile("psraw $4,%xmm1"); + asm volatile("pand %xmm7,%xmm3"); + asm volatile("pand %xmm7,%xmm1"); + asm volatile("pshufb %xmm3,%xmm4"); + asm volatile("pshufb %xmm1,%xmm5"); + asm volatile("pxor %xmm4,%xmm5"); + + asm volatile("movdqa %xmm0,%xmm2"); /* xmm2 = px */ + + /* xmm5 = qx */ + + asm volatile("movdqa %0,%%xmm4" : : "m" (pbmul[0])); + asm volatile("movdqa %0,%%xmm1" : : "m" (pbmul[16])); + asm volatile("movdqa %xmm2,%xmm3"); + asm volatile("psraw $4,%xmm2"); + asm volatile("pand %xmm7,%xmm3"); + asm volatile("pand %xmm7,%xmm2"); + asm volatile("pshufb %xmm3,%xmm4"); + asm volatile("pshufb %xmm2,%xmm1"); + asm volatile("pxor %xmm4,%xmm1"); + + /* xmm1 = pbmul[px] */ + asm volatile("pxor %xmm5,%xmm1"); + /* xmm1 = db = DQ */ + asm volatile("movdqa %%xmm1,%0" : "=m" (*dq)); + + asm volatile("pxor %xmm1,%xmm0"); + asm volatile("movdqa %%xmm0,%0" : "=m" (*dp)); + + bytes -= 16; + p += 16; + q += 16; + dp += 16; + dq += 16; +#endif + } + + kernel_fpu_end(); +} + + +void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + static const u8 __aligned(16) x0f[16] = { + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f}; + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data page + Use the dead data page as temporary storage for delta q */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + kernel_fpu_begin(); + + asm volatile("movdqa %0, %%xmm7" : : "m" (x0f[0])); + + while (bytes) { +#ifdef CONFIG_X86_64 + asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0])); + asm volatile("movdqa %0, %%xmm4" : : "m" (dq[16])); + asm volatile("pxor %0, %%xmm3" : : "m" (q[0])); + asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0])); + + /* xmm3 = q[0] ^ dq[0] */ + + asm volatile("pxor %0, %%xmm4" : : "m" (q[16])); + asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16])); + + /* xmm4 = q[16] ^ dq[16] */ + + asm volatile("movdqa %xmm3, %xmm6"); + asm volatile("movdqa %xmm4, %xmm8"); + + /* xmm4 = xmm8 = q[16] ^ dq[16] */ + + asm volatile("psraw $4, %xmm3"); + asm volatile("pand %xmm7, %xmm6"); + asm volatile("pand %xmm7, %xmm3"); + asm volatile("pshufb %xmm6, %xmm0"); + asm volatile("pshufb %xmm3, %xmm1"); + asm volatile("movdqa %0, %%xmm10" : : "m" (qmul[0])); + asm volatile("pxor %xmm0, %xmm1"); + asm volatile("movdqa %0, %%xmm11" : : "m" (qmul[16])); + + /* xmm1 = qmul[q[0] ^ dq[0]] */ + + asm volatile("psraw $4, %xmm4"); + asm volatile("pand %xmm7, %xmm8"); + asm volatile("pand %xmm7, %xmm4"); + asm volatile("pshufb %xmm8, %xmm10"); + asm volatile("pshufb %xmm4, %xmm11"); + asm volatile("movdqa %0, %%xmm2" : : "m" (p[0])); + asm volatile("pxor %xmm10, %xmm11"); + asm volatile("movdqa %0, %%xmm12" : : "m" (p[16])); + + /* xmm11 = qmul[q[16] ^ dq[16]] */ + + asm volatile("pxor %xmm1, %xmm2"); + + /* xmm2 = p[0] ^ qmul[q[0] ^ dq[0]] */ + + asm volatile("pxor %xmm11, %xmm12"); + + /* xmm12 = p[16] ^ qmul[q[16] ^ dq[16]] */ + + asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0])); + asm volatile("movdqa %%xmm11, %0" : "=m" (dq[16])); + + asm volatile("movdqa %%xmm2, %0" : "=m" (p[0])); + asm volatile("movdqa %%xmm12, %0" : "=m" (p[16])); + + bytes -= 32; + p += 32; + q += 32; + dq += 32; + +#else + asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0])); + asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0])); + asm volatile("pxor %0, %%xmm3" : : "m" (q[0])); + asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16])); + + /* xmm3 = *q ^ *dq */ + + asm volatile("movdqa %xmm3, %xmm6"); + asm volatile("movdqa %0, %%xmm2" : : "m" (p[0])); + asm volatile("psraw $4, %xmm3"); + asm volatile("pand %xmm7, %xmm6"); + asm volatile("pand %xmm7, %xmm3"); + asm volatile("pshufb %xmm6, %xmm0"); + asm volatile("pshufb %xmm3, %xmm1"); + asm volatile("pxor %xmm0, %xmm1"); + + /* xmm1 = qmul[*q ^ *dq */ + + asm volatile("pxor %xmm1, %xmm2"); + + /* xmm2 = *p ^ qmul[*q ^ *dq] */ + + asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0])); + asm volatile("movdqa %%xmm2, %0" : "=m" (p[0])); + + bytes -= 16; + p += 16; + q += 16; + dq += 16; +#endif + } + + kernel_fpu_end(); +} + +const struct raid6_recov_calls raid6_recov_ssse3 = { + .data2 = raid6_2data_recov_ssse3, + .datap = raid6_datap_recov_ssse3, + .valid = raid6_has_ssse3, +#ifdef CONFIG_X86_64 + .name = "ssse3x2", +#else + .name = "ssse3x1", +#endif + .priority = 1, +}; + +#endif -- cgit v1.2.3 From 2dbf708448c836754d25fe6108c5bfe1f5697c95 Mon Sep 17 00:00:00 2001 From: Jim Kukunas Date: Tue, 22 May 2012 13:54:23 +1000 Subject: lib/raid6: update test program for recovery functions Test each combination of recovery and syndrome generation functions. Signed-off-by: Jim Kukunas Signed-off-by: NeilBrown --- lib/raid6/test/Makefile | 2 +- lib/raid6/test/test.c | 32 +++++++++++++++++++++----------- lib/raid6/x86.h | 15 ++++++++++----- 3 files changed, 32 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index aa651697b6d..c76151d9476 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -23,7 +23,7 @@ RANLIB = ranlib all: raid6.a raid6test raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \ - altivec1.o altivec2.o altivec4.o altivec8.o recov.o algos.o \ + altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o algos.o \ tables.o rm -f $@ $(AR) cq $@ $^ diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index 7a930318b17..5a485b7a7d3 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c @@ -90,25 +90,35 @@ static int test_disks(int i, int j) int main(int argc, char *argv[]) { const struct raid6_calls *const *algo; + const struct raid6_recov_calls *const *ra; int i, j; int err = 0; makedata(); - for (algo = raid6_algos; *algo; algo++) { - if (!(*algo)->valid || (*algo)->valid()) { - raid6_call = **algo; + for (ra = raid6_recov_algos; *ra; ra++) { + if ((*ra)->valid && !(*ra)->valid()) + continue; + raid6_2data_recov = (*ra)->data2; + raid6_datap_recov = (*ra)->datap; - /* Nuke syndromes */ - memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + printf("using recovery %s\n", (*ra)->name); - /* Generate assumed good syndrome */ - raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, - (void **)&dataptrs); + for (algo = raid6_algos; *algo; algo++) { + if (!(*algo)->valid || (*algo)->valid()) { + raid6_call = **algo; - for (i = 0; i < NDISKS-1; i++) - for (j = i+1; j < NDISKS; j++) - err += test_disks(i, j); + /* Nuke syndromes */ + memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + + /* Generate assumed good syndrome */ + raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, + (void **)&dataptrs); + + for (i = 0; i < NDISKS-1; i++) + for (j = i+1; j < NDISKS; j++) + err += test_disks(i, j); + } } printf("\n"); } diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index cb2a8c91c88..d55d63232c5 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h @@ -35,24 +35,29 @@ static inline void kernel_fpu_end(void) { } +#define __aligned(x) __attribute__((aligned(x))) + #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ #define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions * (fast save and restore) */ #define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ #define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ +#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ +#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ /* Should work well enough on modern CPUs for testing */ static inline int boot_cpu_has(int flag) { - u32 eax = (flag >> 5) ? 0x80000001 : 1; - u32 edx; + u32 eax = (flag & 0x20) ? 0x80000001 : 1; + u32 ecx, edx; asm volatile("cpuid" - : "+a" (eax), "=d" (edx) - : : "ecx", "ebx"); + : "+a" (eax), "=d" (edx), "=c" (ecx) + : : "ebx"); - return (edx >> (flag & 31)) & 1; + return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1; } #endif /* ndef __KERNEL__ */ -- cgit v1.2.3 From 96e67703e71f4b3cc32b747dbb6158ec74d01e19 Mon Sep 17 00:00:00 2001 From: Jim Kukunas Date: Tue, 22 May 2012 13:54:24 +1000 Subject: lib/raid6: cleanup gen_syndrome function selection Reorders functions in raid6_algos as well as the preference check to reduce the number of functions tested on initialization. Also, creates symmetry between choosing the gen_syndrome functions and choosing the recovery functions. Signed-off-by: Jim Kukunas Signed-off-by: NeilBrown --- lib/raid6/algos.c | 104 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 57 insertions(+), 47 deletions(-) (limited to 'lib') diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 5a7f8022be1..589f5f50ad2 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -34,10 +34,6 @@ struct raid6_calls raid6_call; EXPORT_SYMBOL_GPL(raid6_call); const struct raid6_calls * const raid6_algos[] = { - &raid6_intx1, - &raid6_intx2, - &raid6_intx4, - &raid6_intx8, #if defined(__ia64__) &raid6_intx16, &raid6_intx32, @@ -61,6 +57,10 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_altivec4, &raid6_altivec8, #endif + &raid6_intx1, + &raid6_intx2, + &raid6_intx4, + &raid6_intx8, NULL }; @@ -86,7 +86,7 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = { #define time_before(x, y) ((x) < (y)) #endif -static inline void raid6_choose_recov(void) +static inline const struct raid6_recov_calls *raid6_choose_recov(void) { const struct raid6_recov_calls *const *algo; const struct raid6_recov_calls *best; @@ -103,62 +103,38 @@ static inline void raid6_choose_recov(void) printk("raid6: using %s recovery algorithm\n", best->name); } else printk("raid6: Yikes! No recovery algorithm found!\n"); -} - -/* Try to pick the best algorithm */ -/* This code uses the gfmul table as convenient data set to abuse */ + return best; +} -int __init raid6_select_algo(void) +static inline const struct raid6_calls *raid6_choose_gen( + void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) { - const struct raid6_calls * const * algo; - const struct raid6_calls * best; - char *syndromes; - void *dptrs[(65536/PAGE_SIZE)+2]; - int i, disks; - unsigned long perf, bestperf; - int bestprefer; - unsigned long j0, j1; - - disks = (65536/PAGE_SIZE)+2; - for ( i = 0 ; i < disks-2 ; i++ ) { - dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; - } - - /* Normal code - use a 2-page allocation to avoid D$ conflict */ - syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); - - if ( !syndromes ) { - printk("raid6: Yikes! No memory available.\n"); - return -ENOMEM; - } - - dptrs[disks-2] = syndromes; - dptrs[disks-1] = syndromes + PAGE_SIZE; + unsigned long perf, bestperf, j0, j1; + const struct raid6_calls *const *algo; + const struct raid6_calls *best; - bestperf = 0; bestprefer = 0; best = NULL; + for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { + if (!best || (*algo)->prefer >= best->prefer) { + if ((*algo)->valid && !(*algo)->valid()) + continue; - for ( algo = raid6_algos ; *algo ; algo++ ) { - if ( !(*algo)->valid || (*algo)->valid() ) { perf = 0; preempt_disable(); j0 = jiffies; - while ( (j1 = jiffies) == j0 ) + while ((j1 = jiffies) == j0) cpu_relax(); while (time_before(jiffies, j1 + (1<gen_syndrome(disks, PAGE_SIZE, dptrs); + (*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs); perf++; } preempt_enable(); - if ( (*algo)->prefer > bestprefer || - ((*algo)->prefer == bestprefer && - perf > bestperf) ) { - best = *algo; - bestprefer = best->prefer; + if (perf > bestperf) { bestperf = perf; + best = *algo; } printk("raid6: %-8s %5ld MB/s\n", (*algo)->name, (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); @@ -173,12 +149,46 @@ int __init raid6_select_algo(void) } else printk("raid6: Yikes! No algorithm found!\n"); - free_pages((unsigned long)syndromes, 1); + return best; +} + + +/* Try to pick the best algorithm */ +/* This code uses the gfmul table as convenient data set to abuse */ + +int __init raid6_select_algo(void) +{ + const int disks = (65536/PAGE_SIZE)+2; + + const struct raid6_calls *gen_best; + const struct raid6_recov_calls *rec_best; + char *syndromes; + void *dptrs[(65536/PAGE_SIZE)+2]; + int i; + + for (i = 0; i < disks-2; i++) + dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; + + /* Normal code - use a 2-page allocation to avoid D$ conflict */ + syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); + + if (!syndromes) { + printk("raid6: Yikes! No memory available.\n"); + return -ENOMEM; + } + + dptrs[disks-2] = syndromes; + dptrs[disks-1] = syndromes + PAGE_SIZE; + + /* select raid gen_syndrome function */ + gen_best = raid6_choose_gen(&dptrs, disks); /* select raid recover functions */ - raid6_choose_recov(); + rec_best = raid6_choose_recov(); + + free_pages((unsigned long)syndromes, 1); - return best ? 0 : -EINVAL; + return gen_best && rec_best ? 0 : -EINVAL; } static void raid6_exit(void) -- cgit v1.2.3