summaryrefslogtreecommitdiffstats
path: root/drivers/md/xor.c
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-11-28 03:58:46 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-11-28 03:58:46 +0000
commitb63ad0882a16a5d28003e57f2b0b81dee3fb322b (patch)
tree0a343ce219e2b8b38a5d702d66032c57b83d9720 /drivers/md/xor.c
parenta9d7bff9a84dba79609a0002e5321b74c4d64c64 (diff)
Merge with 2.4.0-test11.
Diffstat (limited to 'drivers/md/xor.c')
-rw-r--r--drivers/md/xor.c2721
1 files changed, 68 insertions, 2653 deletions
diff --git a/drivers/md/xor.c b/drivers/md/xor.c
index 4fe04fb89..f58463ebc 100644
--- a/drivers/md/xor.c
+++ b/drivers/md/xor.c
@@ -1,10 +1,10 @@
/*
* xor.c : Multiple Devices driver for Linux
*
- * Copyright (C) 1996, 1997, 1998, 1999 Ingo Molnar, Matti Aarnio, Jakub Jelinek
+ * Copyright (C) 1996, 1997, 1998, 1999, 2000,
+ * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson.
*
- *
- * optimized RAID-5 checksumming functions.
+ * Dispatch optimized RAID-5 checksumming functions.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -15,2584 +15,66 @@
* (for example /usr/src/linux/COPYING); if not, write to the Free
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/config.h>
#define BH_TRACE 0
#include <linux/module.h>
#include <linux/raid/md.h>
-#ifdef __sparc_v9__
-#include <asm/head.h>
-#include <asm/asi.h>
-#include <asm/visasm.h>
-#endif
-
-/*
- * we use the 'XOR function template' to register multiple xor
- * functions runtime. The kernel measures their speed upon bootup
- * and decides which one to use. (compile-time registration is
- * not enough as certain CPU features like MMX can only be detected
- * runtime)
- *
- * this architecture makes it pretty easy to add new routines
- * that are faster on certain CPUs, without killing other CPU's
- * 'native' routine. Although the current routines are belived
- * to be the physically fastest ones on all CPUs tested, but
- * feel free to prove me wrong and add yet another routine =B-)
- * --mingo
- */
-
-#define MAX_XOR_BLOCKS 5
-
-#define XOR_ARGS (unsigned int count, struct buffer_head **bh_ptr)
-
-typedef void (*xor_block_t) XOR_ARGS;
-xor_block_t xor_block = NULL;
-
-#ifndef __sparc_v9__
-
-struct xor_block_template;
-
-struct xor_block_template {
- char * name;
- xor_block_t xor_block;
- int speed;
- struct xor_block_template * next;
-};
-
-struct xor_block_template * xor_functions = NULL;
-
-#define XORBLOCK_TEMPLATE(x) \
-static void xor_block_##x XOR_ARGS; \
-static struct xor_block_template t_xor_block_##x = \
- { #x, xor_block_##x, 0, NULL }; \
-static void xor_block_##x XOR_ARGS
-
-#ifdef __i386__
-
-#ifdef CONFIG_X86_XMM
-/*
- * Cache avoiding checksumming functions utilizing KNI instructions
- * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
- */
-
-XORBLOCK_TEMPLATE(pIII_kni)
-{
- char xmm_save[16*4];
- int cr0;
- int lines = (bh_ptr[0]->b_size>>8);
-
- __asm__ __volatile__ (
- "movl %%cr0,%0 ;\n\t"
- "clts ;\n\t"
- "movups %%xmm0,(%1) ;\n\t"
- "movups %%xmm1,0x10(%1) ;\n\t"
- "movups %%xmm2,0x20(%1) ;\n\t"
- "movups %%xmm3,0x30(%1) ;\n\t"
- : "=r" (cr0)
- : "r" (xmm_save)
- : "memory" );
-
-#define OFFS(x) "8*("#x"*2)"
-#define PF0(x) \
- " prefetcht0 "OFFS(x)"(%1) ;\n"
-#define LD(x,y) \
- " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n"
-#define ST(x,y) \
- " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n"
-#define PF1(x) \
- " prefetchnta "OFFS(x)"(%2) ;\n"
-#define PF2(x) \
- " prefetchnta "OFFS(x)"(%3) ;\n"
-#define PF3(x) \
- " prefetchnta "OFFS(x)"(%4) ;\n"
-#define PF4(x) \
- " prefetchnta "OFFS(x)"(%5) ;\n"
-#define PF5(x) \
- " prefetchnta "OFFS(x)"(%6) ;\n"
-#define XO1(x,y) \
- " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n"
-#define XO2(x,y) \
- " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n"
-#define XO3(x,y) \
- " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n"
-#define XO4(x,y) \
- " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n"
-#define XO5(x,y) \
- " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n"
-
- switch(count) {
- case 2:
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- LD(i,0) \
- LD(i+1,1) \
- PF1(i) \
- PF1(i+2) \
- LD(i+2,2) \
- LD(i+3,3) \
- PF0(i+4) \
- PF0(i+6) \
- XO1(i,0) \
- XO1(i+1,1) \
- XO1(i+2,2) \
- XO1(i+3,3) \
- ST(i,0) \
- ST(i+1,1) \
- ST(i+2,2) \
- ST(i+3,3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32,0x90 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
-
- :
- : "r" (lines),
- "r" (bh_ptr[0]->b_data),
- "r" (bh_ptr[1]->b_data)
- : "memory" );
- break;
- case 3:
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i+2) \
- LD(i,0) \
- LD(i+1,1) \
- LD(i+2,2) \
- LD(i+3,3) \
- PF2(i) \
- PF2(i+2) \
- PF0(i+4) \
- PF0(i+6) \
- XO1(i,0) \
- XO1(i+1,1) \
- XO1(i+2,2) \
- XO1(i+3,3) \
- XO2(i,0) \
- XO2(i+1,1) \
- XO2(i+2,2) \
- XO2(i+3,3) \
- ST(i,0) \
- ST(i+1,1) \
- ST(i+2,2) \
- ST(i+3,3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32,0x90 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " addl $256, %3 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- :
- : "r" (lines),
- "r" (bh_ptr[0]->b_data),
- "r" (bh_ptr[1]->b_data),
- "r" (bh_ptr[2]->b_data)
- : "memory" );
- break;
- case 4:
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i+2) \
- LD(i,0) \
- LD(i+1,1) \
- LD(i+2,2) \
- LD(i+3,3) \
- PF2(i) \
- PF2(i+2) \
- XO1(i,0) \
- XO1(i+1,1) \
- XO1(i+2,2) \
- XO1(i+3,3) \
- PF3(i) \
- PF3(i+2) \
- PF0(i+4) \
- PF0(i+6) \
- XO2(i,0) \
- XO2(i+1,1) \
- XO2(i+2,2) \
- XO2(i+3,3) \
- XO3(i,0) \
- XO3(i+1,1) \
- XO3(i+2,2) \
- XO3(i+3,3) \
- ST(i,0) \
- ST(i+1,1) \
- ST(i+2,2) \
- ST(i+3,3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32,0x90 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
+#include <linux/raid/xor.h>
+#include <asm/xor.h>
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " addl $256, %3 ;\n"
- " addl $256, %4 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
+/* The xor routines to use. */
+static struct xor_block_template *active_template;
- :
- : "r" (lines),
- "r" (bh_ptr[0]->b_data),
- "r" (bh_ptr[1]->b_data),
- "r" (bh_ptr[2]->b_data),
- "r" (bh_ptr[3]->b_data)
- : "memory" );
- break;
- case 5:
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i+2) \
- LD(i,0) \
- LD(i+1,1) \
- LD(i+2,2) \
- LD(i+3,3) \
- PF2(i) \
- PF2(i+2) \
- XO1(i,0) \
- XO1(i+1,1) \
- XO1(i+2,2) \
- XO1(i+3,3) \
- PF3(i) \
- PF3(i+2) \
- XO2(i,0) \
- XO2(i+1,1) \
- XO2(i+2,2) \
- XO2(i+3,3) \
- PF4(i) \
- PF4(i+2) \
- PF0(i+4) \
- PF0(i+6) \
- XO3(i,0) \
- XO3(i+1,1) \
- XO3(i+2,2) \
- XO3(i+3,3) \
- XO4(i,0) \
- XO4(i+1,1) \
- XO4(i+2,2) \
- XO4(i+3,3) \
- ST(i,0) \
- ST(i+1,1) \
- ST(i+2,2) \
- ST(i+3,3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32,0x90 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " addl $256, %3 ;\n"
- " addl $256, %4 ;\n"
- " addl $256, %5 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
-
- :
- : "r" (lines),
- "r" (bh_ptr[0]->b_data),
- "r" (bh_ptr[1]->b_data),
- "r" (bh_ptr[2]->b_data),
- "r" (bh_ptr[3]->b_data),
- "r" (bh_ptr[4]->b_data)
- : "memory");
- break;
- }
-
- __asm__ __volatile__ (
- "sfence ;\n\t"
- "movups (%1),%%xmm0 ;\n\t"
- "movups 0x10(%1),%%xmm1 ;\n\t"
- "movups 0x20(%1),%%xmm2 ;\n\t"
- "movups 0x30(%1),%%xmm3 ;\n\t"
- "movl %0,%%cr0 ;\n\t"
- :
- : "r" (cr0), "r" (xmm_save)
- : "memory" );
-}
-
-#undef OFFS
-#undef LD
-#undef ST
-#undef PF0
-#undef PF1
-#undef PF2
-#undef PF3
-#undef PF4
-#undef PF5
-#undef XO1
-#undef XO2
-#undef XO3
-#undef XO4
-#undef XO5
-#undef BLOCK
-
-#endif /* CONFIG_X86_XMM */
-
-/*
- * high-speed RAID5 checksumming functions utilizing MMX instructions
- * Copyright (C) 1998 Ingo Molnar
- */
-XORBLOCK_TEMPLATE(pII_mmx)
+void
+xor_block(unsigned int count, struct buffer_head **bh_ptr)
{
- char fpu_save[108];
- int lines = (bh_ptr[0]->b_size>>7);
-
- if (!(current->flags & PF_USEDFPU))
- __asm__ __volatile__ ( " clts;\n");
-
- __asm__ __volatile__ ( " fsave %0; fwait\n"::"m"(fpu_save[0]) );
-
-#define LD(x,y) \
- " movq 8*("#x")(%1), %%mm"#y" ;\n"
-#define ST(x,y) \
- " movq %%mm"#y", 8*("#x")(%1) ;\n"
-#define XO1(x,y) \
- " pxor 8*("#x")(%2), %%mm"#y" ;\n"
-#define XO2(x,y) \
- " pxor 8*("#x")(%3), %%mm"#y" ;\n"
-#define XO3(x,y) \
- " pxor 8*("#x")(%4), %%mm"#y" ;\n"
-#define XO4(x,y) \
- " pxor 8*("#x")(%5), %%mm"#y" ;\n"
-
- switch(count) {
- case 2:
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- LD(i,0) \
- LD(i+1,1) \
- LD(i+2,2) \
- LD(i+3,3) \
- XO1(i,0) \
- ST(i,0) \
- XO1(i+1,1) \
- ST(i+1,1) \
- XO1(i+2,2) \
- ST(i+2,2) \
- XO1(i+3,3) \
- ST(i+3,3)
-
- " .align 32,0x90 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
+ unsigned long *p0, *p1, *p2, *p3, *p4;
+ unsigned long bytes = bh_ptr[0]->b_size;
- " addl $128, %1 ;\n"
- " addl $128, %2 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- :
- : "r" (lines),
- "r" (bh_ptr[0]->b_data),
- "r" (bh_ptr[1]->b_data)
- : "memory");
- break;
- case 3:
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- LD(i,0) \
- LD(i+1,1) \
- LD(i+2,2) \
- LD(i+3,3) \
- XO1(i,0) \
- XO1(i+1,1) \
- XO1(i+2,2) \
- XO1(i+3,3) \
- XO2(i,0) \
- ST(i,0) \
- XO2(i+1,1) \
- ST(i+1,1) \
- XO2(i+2,2) \
- ST(i+2,2) \
- XO2(i+3,3) \
- ST(i+3,3)
-
- " .align 32,0x90 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $128, %1 ;\n"
- " addl $128, %2 ;\n"
- " addl $128, %3 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- :
- : "r" (lines),
- "r" (bh_ptr[0]->b_data),
- "r" (bh_ptr[1]->b_data),
- "r" (bh_ptr[2]->b_data)
- : "memory");
- break;
- case 4:
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- LD(i,0) \
- LD(i+1,1) \
- LD(i+2,2) \
- LD(i+3,3) \
- XO1(i,0) \
- XO1(i+1,1) \
- XO1(i+2,2) \
- XO1(i+3,3) \
- XO2(i,0) \
- XO2(i+1,1) \
- XO2(i+2,2) \
- XO2(i+3,3) \
- XO3(i,0) \
- ST(i,0) \
- XO3(i+1,1) \
- ST(i+1,1) \
- XO3(i+2,2) \
- ST(i+2,2) \
- XO3(i+3,3) \
- ST(i+3,3)
-
- " .align 32,0x90 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $128, %1 ;\n"
- " addl $128, %2 ;\n"
- " addl $128, %3 ;\n"
- " addl $128, %4 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- :
- : "r" (lines),
- "r" (bh_ptr[0]->b_data),
- "r" (bh_ptr[1]->b_data),
- "r" (bh_ptr[2]->b_data),
- "r" (bh_ptr[3]->b_data)
- : "memory");
- break;
- case 5:
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- LD(i,0) \
- LD(i+1,1) \
- LD(i+2,2) \
- LD(i+3,3) \
- XO1(i,0) \
- XO1(i+1,1) \
- XO1(i+2,2) \
- XO1(i+3,3) \
- XO2(i,0) \
- XO2(i+1,1) \
- XO2(i+2,2) \
- XO2(i+3,3) \
- XO3(i,0) \
- XO3(i+1,1) \
- XO3(i+2,2) \
- XO3(i+3,3) \
- XO4(i,0) \
- ST(i,0) \
- XO4(i+1,1) \
- ST(i+1,1) \
- XO4(i+2,2) \
- ST(i+2,2) \
- XO4(i+3,3) \
- ST(i+3,3)
-
- " .align 32,0x90 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $128, %1 ;\n"
- " addl $128, %2 ;\n"
- " addl $128, %3 ;\n"
- " addl $128, %4 ;\n"
- " addl $128, %5 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- :
- : "g" (lines),
- "r" (bh_ptr[0]->b_data),
- "r" (bh_ptr[1]->b_data),
- "r" (bh_ptr[2]->b_data),
- "r" (bh_ptr[3]->b_data),
- "r" (bh_ptr[4]->b_data)
- : "memory");
- break;
+ p0 = (unsigned long *) bh_ptr[0]->b_data;
+ p1 = (unsigned long *) bh_ptr[1]->b_data;
+ if (count == 2) {
+ active_template->do_2(bytes, p0, p1);
+ return;
}
- __asm__ __volatile__ ( " frstor %0;\n"::"m"(fpu_save[0]) );
-
- if (!(current->flags & PF_USEDFPU))
- stts();
-}
-
-#undef LD
-#undef XO1
-#undef XO2
-#undef XO3
-#undef XO4
-#undef ST
-#undef BLOCK
-
-XORBLOCK_TEMPLATE(p5_mmx)
-{
- char fpu_save[108];
- int lines = (bh_ptr[0]->b_size>>6);
-
- if (!(current->flags & PF_USEDFPU))
- __asm__ __volatile__ ( " clts;\n");
-
- __asm__ __volatile__ ( " fsave %0; fwait\n"::"m"(fpu_save[0]) );
-
- switch(count) {
- case 2:
- __asm__ __volatile__ (
-
- " .align 32,0x90 ;\n"
- " 1: ;\n"
- " movq (%1), %%mm0 ;\n"
- " movq 8(%1), %%mm1 ;\n"
- " pxor (%2), %%mm0 ;\n"
- " movq 16(%1), %%mm2 ;\n"
- " movq %%mm0, (%1) ;\n"
- " pxor 8(%2), %%mm1 ;\n"
- " movq 24(%1), %%mm3 ;\n"
- " movq %%mm1, 8(%1) ;\n"
- " pxor 16(%2), %%mm2 ;\n"
- " movq 32(%1), %%mm4 ;\n"
- " movq %%mm2, 16(%1) ;\n"
- " pxor 24(%2), %%mm3 ;\n"
- " movq 40(%1), %%mm5 ;\n"
- " movq %%mm3, 24(%1) ;\n"
- " pxor 32(%2), %%mm4 ;\n"
- " movq 48(%1), %%mm6 ;\n"
- " movq %%mm4, 32(%1) ;\n"
- " pxor 40(%2), %%mm5 ;\n"
- " movq 56(%1), %%mm7 ;\n"
- " movq %%mm5, 40(%1) ;\n"
- " pxor 48(%2), %%mm6 ;\n"
- " pxor 56(%2), %%mm7 ;\n"
- " movq %%mm6, 48(%1) ;\n"
- " movq %%mm7, 56(%1) ;\n"
-
- " addl $64, %1 ;\n"
- " addl $64, %2 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
-
- :
- : "r" (lines),
- "r" (bh_ptr[0]->b_data),
- "r" (bh_ptr[1]->b_data)
- : "memory" );
- break;
- case 3:
- __asm__ __volatile__ (
-
- " .align 32,0x90 ;\n"
- " 1: ;\n"
- " movq (%1), %%mm0 ;\n"
- " movq 8(%1), %%mm1 ;\n"
- " pxor (%2), %%mm0 ;\n"
- " movq 16(%1), %%mm2 ;\n"
- " pxor 8(%2), %%mm1 ;\n"
- " pxor (%3), %%mm0 ;\n"
- " pxor 16(%2), %%mm2 ;\n"
- " movq %%mm0, (%1) ;\n"
- " pxor 8(%3), %%mm1 ;\n"
- " pxor 16(%3), %%mm2 ;\n"
- " movq 24(%1), %%mm3 ;\n"
- " movq %%mm1, 8(%1) ;\n"
- " movq 32(%1), %%mm4 ;\n"
- " movq 40(%1), %%mm5 ;\n"
- " pxor 24(%2), %%mm3 ;\n"
- " movq %%mm2, 16(%1) ;\n"
- " pxor 32(%2), %%mm4 ;\n"
- " pxor 24(%3), %%mm3 ;\n"
- " pxor 40(%2), %%mm5 ;\n"
- " movq %%mm3, 24(%1) ;\n"
- " pxor 32(%3), %%mm4 ;\n"
- " pxor 40(%3), %%mm5 ;\n"
- " movq 48(%1), %%mm6 ;\n"
- " movq %%mm4, 32(%1) ;\n"
- " movq 56(%1), %%mm7 ;\n"
- " pxor 48(%2), %%mm6 ;\n"
- " movq %%mm5, 40(%1) ;\n"
- " pxor 56(%2), %%mm7 ;\n"
- " pxor 48(%3), %%mm6 ;\n"
- " pxor 56(%3), %%mm7 ;\n"
- " movq %%mm6, 48(%1) ;\n"
- " movq %%mm7, 56(%1) ;\n"
-
- " addl $64, %1 ;\n"
- " addl $64, %2 ;\n"
- " addl $64, %3 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
-
- :
- : "r" (lines),
- "r" (bh_ptr[0]->b_data),
- "r" (bh_ptr[1]->b_data),
- "r" (bh_ptr[2]->b_data)
- : "memory" );
- break;
- case 4:
- __asm__ __volatile__ (
-
- " .align 32,0x90 ;\n"
- " 1: ;\n"
- " movq (%1), %%mm0 ;\n"
- " movq 8(%1), %%mm1 ;\n"
- " pxor (%2), %%mm0 ;\n"
- " movq 16(%1), %%mm2 ;\n"
- " pxor 8(%2), %%mm1 ;\n"
- " pxor (%3), %%mm0 ;\n"
- " pxor 16(%2), %%mm2 ;\n"
- " pxor 8(%3), %%mm1 ;\n"
- " pxor (%4), %%mm0 ;\n"
- " movq 24(%1), %%mm3 ;\n"
- " pxor 16(%3), %%mm2 ;\n"
- " pxor 8(%4), %%mm1 ;\n"
- " movq %%mm0, (%1) ;\n"
- " movq 32(%1), %%mm4 ;\n"
- " pxor 24(%2), %%mm3 ;\n"
- " pxor 16(%4), %%mm2 ;\n"
- " movq %%mm1, 8(%1) ;\n"
- " movq 40(%1), %%mm5 ;\n"
- " pxor 32(%2), %%mm4 ;\n"
- " pxor 24(%3), %%mm3 ;\n"
- " movq %%mm2, 16(%1) ;\n"
- " pxor 40(%2), %%mm5 ;\n"
- " pxor 32(%3), %%mm4 ;\n"
- " pxor 24(%4), %%mm3 ;\n"
- " movq %%mm3, 24(%1) ;\n"
- " movq 56(%1), %%mm7 ;\n"
- " movq 48(%1), %%mm6 ;\n"
- " pxor 40(%3), %%mm5 ;\n"
- " pxor 32(%4), %%mm4 ;\n"
- " pxor 48(%2), %%mm6 ;\n"
- " movq %%mm4, 32(%1) ;\n"
- " pxor 56(%2), %%mm7 ;\n"
- " pxor 40(%4), %%mm5 ;\n"
- " pxor 48(%3), %%mm6 ;\n"
- " pxor 56(%3), %%mm7 ;\n"
- " movq %%mm5, 40(%1) ;\n"
- " pxor 48(%4), %%mm6 ;\n"
- " pxor 56(%4), %%mm7 ;\n"
- " movq %%mm6, 48(%1) ;\n"
- " movq %%mm7, 56(%1) ;\n"
-
- " addl $64, %1 ;\n"
- " addl $64, %2 ;\n"
- " addl $64, %3 ;\n"
- " addl $64, %4 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
-
- :
- : "r" (lines),
- "r" (bh_ptr[0]->b_data),
- "r" (bh_ptr[1]->b_data),
- "r" (bh_ptr[2]->b_data),
- "r" (bh_ptr[3]->b_data)
- : "memory" );
- break;
- case 5:
- __asm__ __volatile__ (
-
- " .align 32,0x90 ;\n"
- " 1: ;\n"
- " movq (%1), %%mm0 ;\n"
- " movq 8(%1), %%mm1 ;\n"
- " pxor (%2), %%mm0 ;\n"
- " pxor 8(%2), %%mm1 ;\n"
- " movq 16(%1), %%mm2 ;\n"
- " pxor (%3), %%mm0 ;\n"
- " pxor 8(%3), %%mm1 ;\n"
- " pxor 16(%2), %%mm2 ;\n"
- " pxor (%4), %%mm0 ;\n"
- " pxor 8(%4), %%mm1 ;\n"
- " pxor 16(%3), %%mm2 ;\n"
- " movq 24(%1), %%mm3 ;\n"
- " pxor (%5), %%mm0 ;\n"
- " pxor 8(%5), %%mm1 ;\n"
- " movq %%mm0, (%1) ;\n"
- " pxor 16(%4), %%mm2 ;\n"
- " pxor 24(%2), %%mm3 ;\n"
- " movq %%mm1, 8(%1) ;\n"
- " pxor 16(%5), %%mm2 ;\n"
- " pxor 24(%3), %%mm3 ;\n"
- " movq 32(%1), %%mm4 ;\n"
- " movq %%mm2, 16(%1) ;\n"
- " pxor 24(%4), %%mm3 ;\n"
- " pxor 32(%2), %%mm4 ;\n"
- " movq 40(%1), %%mm5 ;\n"
- " pxor 24(%5), %%mm3 ;\n"
- " pxor 32(%3), %%mm4 ;\n"
- " pxor 40(%2), %%mm5 ;\n"
- " movq %%mm3, 24(%1) ;\n"
- " pxor 32(%4), %%mm4 ;\n"
- " pxor 40(%3), %%mm5 ;\n"
- " movq 48(%1), %%mm6 ;\n"
- " movq 56(%1), %%mm7 ;\n"
- " pxor 32(%5), %%mm4 ;\n"
- " pxor 40(%4), %%mm5 ;\n"
- " pxor 48(%2), %%mm6 ;\n"
- " pxor 56(%2), %%mm7 ;\n"
- " movq %%mm4, 32(%1) ;\n"
- " pxor 48(%3), %%mm6 ;\n"
- " pxor 56(%3), %%mm7 ;\n"
- " pxor 40(%5), %%mm5 ;\n"
- " pxor 48(%4), %%mm6 ;\n"
- " pxor 56(%4), %%mm7 ;\n"
- " movq %%mm5, 40(%1) ;\n"
- " pxor 48(%5), %%mm6 ;\n"
- " pxor 56(%5), %%mm7 ;\n"
- " movq %%mm6, 48(%1) ;\n"
- " movq %%mm7, 56(%1) ;\n"
-
- " addl $64, %1 ;\n"
- " addl $64, %2 ;\n"
- " addl $64, %3 ;\n"
- " addl $64, %4 ;\n"
- " addl $64, %5 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
-
- :
- : "g" (lines),
- "r" (bh_ptr[0]->b_data),
- "r" (bh_ptr[1]->b_data),
- "r" (bh_ptr[2]->b_data),
- "r" (bh_ptr[3]->b_data),
- "r" (bh_ptr[4]->b_data)
- : "memory" );
- break;
+ p2 = (unsigned long *) bh_ptr[2]->b_data;
+ if (count == 3) {
+ active_template->do_3(bytes, p0, p1, p2);
+ return;
}
- __asm__ __volatile__ ( " frstor %0;\n"::"m"(fpu_save[0]) );
-
- if (!(current->flags & PF_USEDFPU))
- stts();
-}
-#endif /* __i386__ */
-#endif /* !__sparc_v9__ */
-
-#ifdef __sparc_v9__
-/*
- * High speed xor_block operation for RAID4/5 utilizing the
- * UltraSparc Visual Instruction Set.
- *
- * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
- *
- * Requirements:
- * !(((long)dest | (long)sourceN) & (64 - 1)) &&
- * !(len & 127) && len >= 256
- *
- * It is done in pure assembly, as otherwise gcc makes it
- * a non-leaf function, which is not what we want.
- * Also, we don't measure the speeds as on other architectures,
- * as the measuring routine does not take into account cold caches
- * and the fact that xor_block_VIS bypasses the caches.
- * xor_block_32regs might be 5% faster for count 2 if caches are hot
- * and things just right (for count 3 VIS is about as fast as 32regs for
- * hot caches and for count 4 and 5 VIS is faster by good margin always),
- * but I think it is better not to pollute the caches.
- * Actually, if I'd just fight for speed for hot caches, I could
- * write a hybrid VIS/integer routine, which would do always two
- * 64B blocks in VIS and two in IEUs, but I really care more about
- * caches.
- */
-extern void *VISenter(void);
-extern void xor_block_VIS XOR_ARGS;
-
-void __xor_block_VIS(void)
-{
-__asm__ ("
- .globl xor_block_VIS
-xor_block_VIS:
- ldx [%%o1 + 0], %%o4
- ldx [%%o1 + 8], %%o3
- ldx [%%o4 + %1], %%g5
- ldx [%%o4 + %0], %%o4
- ldx [%%o3 + %0], %%o3
- rd %%fprs, %%o5
- andcc %%o5, %2, %%g0
- be,pt %%icc, 297f
- sethi %%hi(%5), %%g1
- jmpl %%g1 + %%lo(%5), %%g7
- add %%g7, 8, %%g7
-297: wr %%g0, %4, %%fprs
- membar #LoadStore|#StoreLoad|#StoreStore
- sub %%g5, 64, %%g5
- ldda [%%o4] %3, %%f0
- ldda [%%o3] %3, %%f16
- cmp %%o0, 4
- bgeu,pt %%xcc, 10f
- cmp %%o0, 3
- be,pn %%xcc, 13f
- mov -64, %%g1
- sub %%g5, 64, %%g5
- rd %%asi, %%g1
- wr %%g0, %3, %%asi
-
-2: ldda [%%o4 + 64] %%asi, %%f32
- fxor %%f0, %%f16, %%f16
- fxor %%f2, %%f18, %%f18
- fxor %%f4, %%f20, %%f20
- fxor %%f6, %%f22, %%f22
- fxor %%f8, %%f24, %%f24
- fxor %%f10, %%f26, %%f26
- fxor %%f12, %%f28, %%f28
- fxor %%f14, %%f30, %%f30
- stda %%f16, [%%o4] %3
- ldda [%%o3 + 64] %%asi, %%f48
- ldda [%%o4 + 128] %%asi, %%f0
- fxor %%f32, %%f48, %%f48
- fxor %%f34, %%f50, %%f50
- add %%o4, 128, %%o4
- fxor %%f36, %%f52, %%f52
- add %%o3, 128, %%o3
- fxor %%f38, %%f54, %%f54
- subcc %%g5, 128, %%g5
- fxor %%f40, %%f56, %%f56
- fxor %%f42, %%f58, %%f58
- fxor %%f44, %%f60, %%f60
- fxor %%f46, %%f62, %%f62
- stda %%f48, [%%o4 - 64] %%asi
- bne,pt %%xcc, 2b
- ldda [%%o3] %3, %%f16
-
- ldda [%%o4 + 64] %%asi, %%f32
- fxor %%f0, %%f16, %%f16
- fxor %%f2, %%f18, %%f18
- fxor %%f4, %%f20, %%f20
- fxor %%f6, %%f22, %%f22
- fxor %%f8, %%f24, %%f24
- fxor %%f10, %%f26, %%f26
- fxor %%f12, %%f28, %%f28
- fxor %%f14, %%f30, %%f30
- stda %%f16, [%%o4] %3
- ldda [%%o3 + 64] %%asi, %%f48
- membar #Sync
- fxor %%f32, %%f48, %%f48
- fxor %%f34, %%f50, %%f50
- fxor %%f36, %%f52, %%f52
- fxor %%f38, %%f54, %%f54
- fxor %%f40, %%f56, %%f56
- fxor %%f42, %%f58, %%f58
- fxor %%f44, %%f60, %%f60
- fxor %%f46, %%f62, %%f62
- stda %%f48, [%%o4 + 64] %%asi
- membar #Sync|#StoreStore|#StoreLoad
- wr %%g0, 0, %%fprs
- retl
- wr %%g1, %%g0, %%asi
-
-13: ldx [%%o1 + 16], %%o2
- ldx [%%o2 + %0], %%o2
-
-3: ldda [%%o2] %3, %%f32
- fxor %%f0, %%f16, %%f48
- fxor %%f2, %%f18, %%f50
- add %%o4, 64, %%o4
- fxor %%f4, %%f20, %%f52
- fxor %%f6, %%f22, %%f54
- add %%o3, 64, %%o3
- fxor %%f8, %%f24, %%f56
- fxor %%f10, %%f26, %%f58
- fxor %%f12, %%f28, %%f60
- fxor %%f14, %%f30, %%f62
- ldda [%%o4] %3, %%f0
- fxor %%f48, %%f32, %%f48
- fxor %%f50, %%f34, %%f50
- fxor %%f52, %%f36, %%f52
- fxor %%f54, %%f38, %%f54
- add %%o2, 64, %%o2
- fxor %%f56, %%f40, %%f56
- fxor %%f58, %%f42, %%f58
- subcc %%g5, 64, %%g5
- fxor %%f60, %%f44, %%f60
- fxor %%f62, %%f46, %%f62
- stda %%f48, [%%o4 + %%g1] %3
- bne,pt %%xcc, 3b
- ldda [%%o3] %3, %%f16
-
- ldda [%%o2] %3, %%f32
- fxor %%f0, %%f16, %%f48
- fxor %%f2, %%f18, %%f50
- fxor %%f4, %%f20, %%f52
- fxor %%f6, %%f22, %%f54
- fxor %%f8, %%f24, %%f56
- fxor %%f10, %%f26, %%f58
- fxor %%f12, %%f28, %%f60
- fxor %%f14, %%f30, %%f62
- membar #Sync
- fxor %%f48, %%f32, %%f48
- fxor %%f50, %%f34, %%f50
- fxor %%f52, %%f36, %%f52
- fxor %%f54, %%f38, %%f54
- fxor %%f56, %%f40, %%f56
- fxor %%f58, %%f42, %%f58
- fxor %%f60, %%f44, %%f60
- fxor %%f62, %%f46, %%f62
- stda %%f48, [%%o4] %3
- membar #Sync|#StoreStore|#StoreLoad
- retl
- wr %%g0, 0, %%fprs
-
-10: cmp %%o0, 5
- be,pt %%xcc, 15f
- mov -64, %%g1
-
-14: ldx [%%o1 + 16], %%o2
- ldx [%%o1 + 24], %%o0
- ldx [%%o2 + %0], %%o2
- ldx [%%o0 + %0], %%o0
-
-4: ldda [%%o2] %3, %%f32
- fxor %%f0, %%f16, %%f16
- fxor %%f2, %%f18, %%f18
- add %%o4, 64, %%o4
- fxor %%f4, %%f20, %%f20
- fxor %%f6, %%f22, %%f22
- add %%o3, 64, %%o3
- fxor %%f8, %%f24, %%f24
- fxor %%f10, %%f26, %%f26
- fxor %%f12, %%f28, %%f28
- fxor %%f14, %%f30, %%f30
- ldda [%%o0] %3, %%f48
- fxor %%f16, %%f32, %%f32
- fxor %%f18, %%f34, %%f34
- fxor %%f20, %%f36, %%f36
- fxor %%f22, %%f38, %%f38
- add %%o2, 64, %%o2
- fxor %%f24, %%f40, %%f40
- fxor %%f26, %%f42, %%f42
- fxor %%f28, %%f44, %%f44
- fxor %%f30, %%f46, %%f46
- ldda [%%o4] %3, %%f0
- fxor %%f32, %%f48, %%f48
- fxor %%f34, %%f50, %%f50
- fxor %%f36, %%f52, %%f52
- add %%o0, 64, %%o0
- fxor %%f38, %%f54, %%f54
- fxor %%f40, %%f56, %%f56
- fxor %%f42, %%f58, %%f58
- subcc %%g5, 64, %%g5
- fxor %%f44, %%f60, %%f60
- fxor %%f46, %%f62, %%f62
- stda %%f48, [%%o4 + %%g1] %3
- bne,pt %%xcc, 4b
- ldda [%%o3] %3, %%f16
-
- ldda [%%o2] %3, %%f32
- fxor %%f0, %%f16, %%f16
- fxor %%f2, %%f18, %%f18
- fxor %%f4, %%f20, %%f20
- fxor %%f6, %%f22, %%f22
- fxor %%f8, %%f24, %%f24
- fxor %%f10, %%f26, %%f26
- fxor %%f12, %%f28, %%f28
- fxor %%f14, %%f30, %%f30
- ldda [%%o0] %3, %%f48
- fxor %%f16, %%f32, %%f32
- fxor %%f18, %%f34, %%f34
- fxor %%f20, %%f36, %%f36
- fxor %%f22, %%f38, %%f38
- fxor %%f24, %%f40, %%f40
- fxor %%f26, %%f42, %%f42
- fxor %%f28, %%f44, %%f44
- fxor %%f30, %%f46, %%f46
- membar #Sync
- fxor %%f32, %%f48, %%f48
- fxor %%f34, %%f50, %%f50
- fxor %%f36, %%f52, %%f52
- fxor %%f38, %%f54, %%f54
- fxor %%f40, %%f56, %%f56
- fxor %%f42, %%f58, %%f58
- fxor %%f44, %%f60, %%f60
- fxor %%f46, %%f62, %%f62
- stda %%f48, [%%o4] %3
- membar #Sync|#StoreStore|#StoreLoad
- retl
- wr %%g0, 0, %%fprs
-
-15: ldx [%%o1 + 16], %%o2
- ldx [%%o1 + 24], %%o0
- ldx [%%o1 + 32], %%o1
- ldx [%%o2 + %0], %%o2
- ldx [%%o0 + %0], %%o0
- ldx [%%o1 + %0], %%o1
-
-5: ldda [%%o2] %3, %%f32
- fxor %%f0, %%f16, %%f48
- fxor %%f2, %%f18, %%f50
- add %%o4, 64, %%o4
- fxor %%f4, %%f20, %%f52
- fxor %%f6, %%f22, %%f54
- add %%o3, 64, %%o3
- fxor %%f8, %%f24, %%f56
- fxor %%f10, %%f26, %%f58
- fxor %%f12, %%f28, %%f60
- fxor %%f14, %%f30, %%f62
- ldda [%%o0] %3, %%f16
- fxor %%f48, %%f32, %%f48
- fxor %%f50, %%f34, %%f50
- fxor %%f52, %%f36, %%f52
- fxor %%f54, %%f38, %%f54
- add %%o2, 64, %%o2
- fxor %%f56, %%f40, %%f56
- fxor %%f58, %%f42, %%f58
- fxor %%f60, %%f44, %%f60
- fxor %%f62, %%f46, %%f62
- ldda [%%o1] %3, %%f32
- fxor %%f48, %%f16, %%f48
- fxor %%f50, %%f18, %%f50
- add %%o0, 64, %%o0
- fxor %%f52, %%f20, %%f52
- fxor %%f54, %%f22, %%f54
- add %%o1, 64, %%o1
- fxor %%f56, %%f24, %%f56
- fxor %%f58, %%f26, %%f58
- fxor %%f60, %%f28, %%f60
- fxor %%f62, %%f30, %%f62
- ldda [%%o4] %3, %%f0
- fxor %%f48, %%f32, %%f48
- fxor %%f50, %%f34, %%f50
- fxor %%f52, %%f36, %%f52
- fxor %%f54, %%f38, %%f54
- fxor %%f56, %%f40, %%f56
- fxor %%f58, %%f42, %%f58
- subcc %%g5, 64, %%g5
- fxor %%f60, %%f44, %%f60
- fxor %%f62, %%f46, %%f62
- stda %%f48, [%%o4 + %%g1] %3
- bne,pt %%xcc, 5b
- ldda [%%o3] %3, %%f16
-
- ldda [%%o2] %3, %%f32
- fxor %%f0, %%f16, %%f48
- fxor %%f2, %%f18, %%f50
- fxor %%f4, %%f20, %%f52
- fxor %%f6, %%f22, %%f54
- fxor %%f8, %%f24, %%f56
- fxor %%f10, %%f26, %%f58
- fxor %%f12, %%f28, %%f60
- fxor %%f14, %%f30, %%f62
- ldda [%%o0] %3, %%f16
- fxor %%f48, %%f32, %%f48
- fxor %%f50, %%f34, %%f50
- fxor %%f52, %%f36, %%f52
- fxor %%f54, %%f38, %%f54
- fxor %%f56, %%f40, %%f56
- fxor %%f58, %%f42, %%f58
- fxor %%f60, %%f44, %%f60
- fxor %%f62, %%f46, %%f62
- ldda [%%o1] %3, %%f32
- fxor %%f48, %%f16, %%f48
- fxor %%f50, %%f18, %%f50
- fxor %%f52, %%f20, %%f52
- fxor %%f54, %%f22, %%f54
- fxor %%f56, %%f24, %%f56
- fxor %%f58, %%f26, %%f58
- fxor %%f60, %%f28, %%f60
- fxor %%f62, %%f30, %%f62
- membar #Sync
- fxor %%f48, %%f32, %%f48
- fxor %%f50, %%f34, %%f50
- fxor %%f52, %%f36, %%f52
- fxor %%f54, %%f38, %%f54
- fxor %%f56, %%f40, %%f56
- fxor %%f58, %%f42, %%f58
- fxor %%f60, %%f44, %%f60
- fxor %%f62, %%f46, %%f62
- stda %%f48, [%%o4] %3
- membar #Sync|#StoreStore|#StoreLoad
- retl
- wr %%g0, 0, %%fprs
- " : :
- "i" (&((struct buffer_head *)0)->b_data),
- "i" (&((struct buffer_head *)0)->b_size),
- "i" (FPRS_FEF|FPRS_DU), "i" (ASI_BLK_P),
- "i" (FPRS_FEF), "i" (VISenter));
-}
-#endif /* __sparc_v9__ */
-
-#if defined(__sparc__) && !defined(__sparc_v9__)
-/*
- * High speed xor_block operation for RAID4/5 utilizing the
- * ldd/std SPARC instructions.
- *
- * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz)
- *
- */
-
-XORBLOCK_TEMPLATE(SPARC)
-{
- int size = bh_ptr[0]->b_size;
- int lines = size / (sizeof (long)) / 8, i;
- long *destp = (long *) bh_ptr[0]->b_data;
- long *source1 = (long *) bh_ptr[1]->b_data;
- long *source2, *source3, *source4;
-
- switch (count) {
- case 2:
- for (i = lines; i > 0; i--) {
- __asm__ __volatile__("
- ldd [%0 + 0x00], %%g2
- ldd [%0 + 0x08], %%g4
- ldd [%0 + 0x10], %%o0
- ldd [%0 + 0x18], %%o2
- ldd [%1 + 0x00], %%o4
- ldd [%1 + 0x08], %%l0
- ldd [%1 + 0x10], %%l2
- ldd [%1 + 0x18], %%l4
- xor %%g2, %%o4, %%g2
- xor %%g3, %%o5, %%g3
- xor %%g4, %%l0, %%g4
- xor %%g5, %%l1, %%g5
- xor %%o0, %%l2, %%o0
- xor %%o1, %%l3, %%o1
- xor %%o2, %%l4, %%o2
- xor %%o3, %%l5, %%o3
- std %%g2, [%0 + 0x00]
- std %%g4, [%0 + 0x08]
- std %%o0, [%0 + 0x10]
- std %%o2, [%0 + 0x18]
- " : : "r" (destp), "r" (source1) : "g2", "g3", "g4", "g5", "o0",
- "o1", "o2", "o3", "o4", "o5", "l0", "l1", "l2", "l3", "l4", "l5");
- destp += 8;
- source1 += 8;
- }
- break;
- case 3:
- source2 = (long *) bh_ptr[2]->b_data;
- for (i = lines; i > 0; i--) {
- __asm__ __volatile__("
- ldd [%0 + 0x00], %%g2
- ldd [%0 + 0x08], %%g4
- ldd [%0 + 0x10], %%o0
- ldd [%0 + 0x18], %%o2
- ldd [%1 + 0x00], %%o4
- ldd [%1 + 0x08], %%l0
- ldd [%1 + 0x10], %%l2
- ldd [%1 + 0x18], %%l4
- xor %%g2, %%o4, %%g2
- xor %%g3, %%o5, %%g3
- ldd [%2 + 0x00], %%o4
- xor %%g4, %%l0, %%g4
- xor %%g5, %%l1, %%g5
- ldd [%2 + 0x08], %%l0
- xor %%o0, %%l2, %%o0
- xor %%o1, %%l3, %%o1
- ldd [%2 + 0x10], %%l2
- xor %%o2, %%l4, %%o2
- xor %%o3, %%l5, %%o3
- ldd [%2 + 0x18], %%l4
- xor %%g2, %%o4, %%g2
- xor %%g3, %%o5, %%g3
- xor %%g4, %%l0, %%g4
- xor %%g5, %%l1, %%g5
- xor %%o0, %%l2, %%o0
- xor %%o1, %%l3, %%o1
- xor %%o2, %%l4, %%o2
- xor %%o3, %%l5, %%o3
- std %%g2, [%0 + 0x00]
- std %%g4, [%0 + 0x08]
- std %%o0, [%0 + 0x10]
- std %%o2, [%0 + 0x18]
- " : : "r" (destp), "r" (source1), "r" (source2)
- : "g2", "g3", "g4", "g5", "o0", "o1", "o2", "o3", "o4", "o5",
- "l0", "l1", "l2", "l3", "l4", "l5");
- destp += 8;
- source1 += 8;
- source2 += 8;
- }
- break;
- case 4:
- source2 = (long *) bh_ptr[2]->b_data;
- source3 = (long *) bh_ptr[3]->b_data;
- for (i = lines; i > 0; i--) {
- __asm__ __volatile__("
- ldd [%0 + 0x00], %%g2
- ldd [%0 + 0x08], %%g4
- ldd [%0 + 0x10], %%o0
- ldd [%0 + 0x18], %%o2
- ldd [%1 + 0x00], %%o4
- ldd [%1 + 0x08], %%l0
- ldd [%1 + 0x10], %%l2
- ldd [%1 + 0x18], %%l4
- xor %%g2, %%o4, %%g2
- xor %%g3, %%o5, %%g3
- ldd [%2 + 0x00], %%o4
- xor %%g4, %%l0, %%g4
- xor %%g5, %%l1, %%g5
- ldd [%2 + 0x08], %%l0
- xor %%o0, %%l2, %%o0
- xor %%o1, %%l3, %%o1
- ldd [%2 + 0x10], %%l2
- xor %%o2, %%l4, %%o2
- xor %%o3, %%l5, %%o3
- ldd [%2 + 0x18], %%l4
- xor %%g2, %%o4, %%g2
- xor %%g3, %%o5, %%g3
- ldd [%3 + 0x00], %%o4
- xor %%g4, %%l0, %%g4
- xor %%g5, %%l1, %%g5
- ldd [%3 + 0x08], %%l0
- xor %%o0, %%l2, %%o0
- xor %%o1, %%l3, %%o1
- ldd [%3 + 0x10], %%l2
- xor %%o2, %%l4, %%o2
- xor %%o3, %%l5, %%o3
- ldd [%3 + 0x18], %%l4
- xor %%g2, %%o4, %%g2
- xor %%g3, %%o5, %%g3
- xor %%g4, %%l0, %%g4
- xor %%g5, %%l1, %%g5
- xor %%o0, %%l2, %%o0
- xor %%o1, %%l3, %%o1
- xor %%o2, %%l4, %%o2
- xor %%o3, %%l5, %%o3
- std %%g2, [%0 + 0x00]
- std %%g4, [%0 + 0x08]
- std %%o0, [%0 + 0x10]
- std %%o2, [%0 + 0x18]
- " : : "r" (destp), "r" (source1), "r" (source2), "r" (source3)
- : "g2", "g3", "g4", "g5", "o0", "o1", "o2", "o3", "o4", "o5",
- "l0", "l1", "l2", "l3", "l4", "l5");
- destp += 8;
- source1 += 8;
- source2 += 8;
- source3 += 8;
- }
- break;
- case 5:
- source2 = (long *) bh_ptr[2]->b_data;
- source3 = (long *) bh_ptr[3]->b_data;
- source4 = (long *) bh_ptr[4]->b_data;
- for (i = lines; i > 0; i--) {
- __asm__ __volatile__("
- ldd [%0 + 0x00], %%g2
- ldd [%0 + 0x08], %%g4
- ldd [%0 + 0x10], %%o0
- ldd [%0 + 0x18], %%o2
- ldd [%1 + 0x00], %%o4
- ldd [%1 + 0x08], %%l0
- ldd [%1 + 0x10], %%l2
- ldd [%1 + 0x18], %%l4
- xor %%g2, %%o4, %%g2
- xor %%g3, %%o5, %%g3
- ldd [%2 + 0x00], %%o4
- xor %%g4, %%l0, %%g4
- xor %%g5, %%l1, %%g5
- ldd [%2 + 0x08], %%l0
- xor %%o0, %%l2, %%o0
- xor %%o1, %%l3, %%o1
- ldd [%2 + 0x10], %%l2
- xor %%o2, %%l4, %%o2
- xor %%o3, %%l5, %%o3
- ldd [%2 + 0x18], %%l4
- xor %%g2, %%o4, %%g2
- xor %%g3, %%o5, %%g3
- ldd [%3 + 0x00], %%o4
- xor %%g4, %%l0, %%g4
- xor %%g5, %%l1, %%g5
- ldd [%3 + 0x08], %%l0
- xor %%o0, %%l2, %%o0
- xor %%o1, %%l3, %%o1
- ldd [%3 + 0x10], %%l2
- xor %%o2, %%l4, %%o2
- xor %%o3, %%l5, %%o3
- ldd [%3 + 0x18], %%l4
- xor %%g2, %%o4, %%g2
- xor %%g3, %%o5, %%g3
- ldd [%4 + 0x00], %%o4
- xor %%g4, %%l0, %%g4
- xor %%g5, %%l1, %%g5
- ldd [%4 + 0x08], %%l0
- xor %%o0, %%l2, %%o0
- xor %%o1, %%l3, %%o1
- ldd [%4 + 0x10], %%l2
- xor %%o2, %%l4, %%o2
- xor %%o3, %%l5, %%o3
- ldd [%4 + 0x18], %%l4
- xor %%g2, %%o4, %%g2
- xor %%g3, %%o5, %%g3
- xor %%g4, %%l0, %%g4
- xor %%g5, %%l1, %%g5
- xor %%o0, %%l2, %%o0
- xor %%o1, %%l3, %%o1
- xor %%o2, %%l4, %%o2
- xor %%o3, %%l5, %%o3
- std %%g2, [%0 + 0x00]
- std %%g4, [%0 + 0x08]
- std %%o0, [%0 + 0x10]
- std %%o2, [%0 + 0x18]
- " : : "r" (destp), "r" (source1), "r" (source2), "r" (source3), "r" (source4)
- : "g2", "g3", "g4", "g5", "o0", "o1", "o2", "o3", "o4", "o5",
- "l0", "l1", "l2", "l3", "l4", "l5");
- destp += 8;
- source1 += 8;
- source2 += 8;
- source3 += 8;
- source4 += 8;
- }
- break;
+ p3 = (unsigned long *) bh_ptr[3]->b_data;
+ if (count == 4) {
+ active_template->do_4(bytes, p0, p1, p2, p3);
+ return;
}
-}
-#endif /* __sparc_v[78]__ */
-
-#ifdef __alpha__
-/*
- * High speed xor_block operation for RAID4/5 pipelined for Alpha EV5.
- * There is a second version using EV6 prefetch instructions.
- *
- * Copyright (C) 2000 Richard Henderson (rth@redhat.com)
- */
-
-XORBLOCK_TEMPLATE(alpha)
-{
- long lines = bh_ptr[0]->b_size / sizeof (long) / 8;
- long *d = (long *) bh_ptr[0]->b_data;
- long *s1 = (long *) bh_ptr[1]->b_data;
- long *s2, *s3, *s4;
-
- if (count == 2) goto two_blocks;
-
- s2 = (long *) bh_ptr[2]->b_data;
- if (count == 3) goto three_blocks;
-
- s3 = (long *) bh_ptr[3]->b_data;
- if (count == 4) goto four_blocks;
-
- s4 = (long *) bh_ptr[4]->b_data;
- goto five_blocks;
-
-two_blocks:
-asm volatile ("
- .align 4
-2:
- ldq $0,0(%0)
- ldq $1,0(%1)
- ldq $2,8(%0)
- ldq $3,8(%1)
-
- ldq $4,16(%0)
- ldq $5,16(%1)
- ldq $6,24(%0)
- ldq $7,24(%1)
-
- ldq $16,32(%0)
- ldq $17,32(%1)
- ldq $18,40(%0)
- ldq $19,40(%1)
-
- ldq $20,48(%0)
- ldq $21,48(%1)
- ldq $22,56(%0)
- xor $0,$1,$0 # 7 cycles from $1 load
-
- ldq $23,56(%1)
- xor $2,$3,$2
- stq $0,0(%0)
- xor $4,$5,$4
-
- stq $2,8(%0)
- xor $6,$7,$6
- stq $4,16(%0)
- xor $16,$17,$16
-
- stq $6,24(%0)
- xor $18,$19,$18
- stq $16,32(%0)
- xor $20,$21,$20
-
- stq $18,40(%0)
- xor $22,$23,$22
- stq $20,48(%0)
- subq %2,1,%2
-
- stq $22,56(%0)
- addq %0,64,%0
- addq %1,64,%1
- bgt %2,2b"
- : "=r"(d), "=r"(s1), "=r"(lines)
- : "0"(d), "1"(s1), "2"(lines)
- : "memory", "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",
- "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
- return;
-
-three_blocks:
-asm volatile ("
- .align 4
-3:
- ldq $0,0(%0)
- ldq $1,0(%1)
- ldq $2,0(%2)
- ldq $3,8(%0)
-
- ldq $4,8(%1)
- ldq $6,16(%0)
- ldq $7,16(%1)
- ldq $17,24(%0)
-
- ldq $18,24(%1)
- ldq $20,32(%0)
- ldq $21,32(%1)
- ldq $5,8(%2)
-
- ldq $16,16(%2)
- ldq $19,24(%2)
- ldq $22,32(%2)
- nop
-
- xor $0,$1,$1 # 8 cycles from $0 load
- xor $3,$4,$4 # 6 cycles from $4 load
- xor $6,$7,$7 # 6 cycles from $7 load
- xor $17,$18,$18 # 5 cycles from $18 load
-
- xor $1,$2,$2 # 9 cycles from $2 load
- xor $20,$21,$21 # 5 cycles from $21 load
- stq $2,0(%0)
- xor $4,$5,$5 # 6 cycles from $5 load
-
- stq $5,8(%0)
- xor $7,$16,$16 # 7 cycles from $16 load
- stq $16,16(%0)
- xor $18,$19,$19 # 7 cycles from $19 load
-
- stq $19,24(%0)
- xor $21,$22,$22 # 7 cycles from $22 load
- stq $22,32(%0)
- nop
-
- ldq $0,40(%0)
- ldq $1,40(%1)
- ldq $3,48(%0)
- ldq $4,48(%1)
-
- ldq $6,56(%0)
- ldq $7,56(%1)
- ldq $2,40(%2)
- ldq $5,48(%2)
-
- ldq $16,56(%2)
- xor $0,$1,$1 # 4 cycles from $1 load
- xor $3,$4,$4 # 5 cycles from $4 load
- xor $6,$7,$7 # 5 cycles from $7 load
-
- xor $1,$2,$2 # 4 cycles from $2 load
- xor $4,$5,$5 # 5 cycles from $5 load
- stq $2,40(%0)
- xor $7,$16,$16 # 4 cycles from $16 load
-
- stq $5,48(%0)
- subq %3,1,%3
- stq $16,56(%0)
- addq %2,64,%2
-
- addq %1,64,%1
- addq %0,64,%0
- bgt %3,3b"
- : "=r"(d), "=r"(s1), "=r"(s2), "=r"(lines)
- : "0"(d), "1"(s1), "2"(s2), "3"(lines)
- : "memory", "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",
- "$16", "$17", "$18", "$19", "$20", "$21", "$22");
- return;
-
-four_blocks:
-asm volatile ("
- .align 4
-4:
- ldq $0,0(%0)
- ldq $1,0(%1)
- ldq $2,0(%2)
- ldq $3,0(%3)
-
- ldq $4,8(%0)
- ldq $5,8(%1)
- ldq $6,8(%2)
- ldq $7,8(%3)
- ldq $16,16(%0)
- ldq $17,16(%1)
- ldq $18,16(%2)
- ldq $19,16(%3)
-
- ldq $20,24(%0)
- xor $0,$1,$1 # 6 cycles from $1 load
- ldq $21,24(%1)
- xor $2,$3,$3 # 6 cycles from $3 load
-
- ldq $0,24(%2)
- xor $1,$3,$3
- ldq $1,24(%3)
- xor $4,$5,$5 # 7 cycles from $5 load
-
- stq $3,0(%0)
- xor $6,$7,$7
- xor $16,$17,$17 # 7 cycles from $17 load
- xor $5,$7,$7
-
- stq $7,8(%0)
- xor $18,$19,$19 # 7 cycles from $19 load
- ldq $2,32(%0)
- xor $17,$19,$19
-
- ldq $3,32(%1)
- ldq $4,32(%2)
- ldq $5,32(%3)
- xor $20,$21,$21 # 8 cycles from $21 load
-
- ldq $6,40(%0)
- ldq $7,40(%1)
- ldq $16,40(%2)
- ldq $17,40(%3)
-
- stq $19,16(%0)
- xor $0,$1,$1 # 9 cycles from $1 load
- xor $2,$3,$3 # 5 cycles from $3 load
- xor $21,$1,$1
-
- ldq $18,48(%0)
- xor $4,$5,$5 # 5 cycles from $5 load
- ldq $19,48(%1)
- xor $3,$5,$5
-
- ldq $20,48(%2)
- ldq $21,48(%3)
- ldq $0,56(%0)
- ldq $1,56(%1)
-
- ldq $2,56(%2)
- xor $6,$7,$7 # 8 cycles from $6 load
- ldq $3,56(%3)
- xor $16,$17,$17 # 8 cycles from $17 load
-
- xor $7,$17,$17
- xor $18,$19,$19 # 5 cycles from $19 load
- xor $20,$21,$21 # 5 cycles from $21 load
- xor $19,$21,$21
-
- stq $1,24(%0)
- xor $0,$1,$1 # 5 cycles from $1 load
- stq $5,32(%0)
- xor $2,$3,$3 # 4 cycles from $3 load
-
- stq $17,40(%0)
- xor $1,$3,$3
- stq $21,48(%0)
- subq %4,1,%4
-
- stq $3,56(%0)
- addq %3,64,%3
- addq %2,64,%2
- addq %1,64,%1
-
- addq %0,64,%0
- bgt %4,4b"
- : "=r"(d), "=r"(s1), "=r"(s2), "=r"(s3), "=r"(lines)
- : "0"(d), "1"(s1), "2"(s2), "3"(s3), "4"(lines)
- : "memory", "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",
- "$16", "$17", "$18", "$19", "$20", "$21");
- return;
-
-five_blocks:
-asm volatile ("
- ldq %0,0(%6)
- ldq %1,8(%6)
- ldq %2,16(%6)
- ldq %3,24(%6)
- ldq %4,32(%6)
- ldq %0,%7(%0)
- ldq %1,%7(%1)
- ldq %2,%7(%2)
- ldq %3,%7(%3)
- ldq %4,%7(%4)
- .align 4
-5:
- ldq $0,0(%0)
- ldq $1,0(%1)
- ldq $2,0(%2)
- ldq $3,0(%3)
-
- ldq $4,0(%4)
- ldq $5,8(%0)
- ldq $6,8(%1)
- ldq $7,8(%2)
-
- ldq $16,8(%3)
- ldq $17,8(%4)
- ldq $18,16(%0)
- ldq $19,16(%1)
-
- ldq $20,16(%2)
- xor $0,$1,$1 # 6 cycles from $1 load
- ldq $21,16(%3)
- xor $2,$3,$3 # 6 cycles from $3 load
-
- ldq $0,16(%4)
- xor $1,$3,$3
- ldq $1,24(%0)
- xor $3,$4,$4 # 7 cycles from $4 load
-
- stq $4,0(%0)
- xor $5,$6,$6 # 7 cycles from $6 load
- xor $7,$16,$16 # 7 cycles from $16 load
- xor $6,$17,$17 # 7 cycles from $17 load
-
- ldq $2,24(%1)
- xor $16,$17,$17
- ldq $3,24(%2)
- xor $18,$19,$19 # 8 cycles from $19 load
-
- stq $17,8(%0)
- xor $19,$20,$20 # 8 cycles from $20 load
- ldq $4,24(%3)
- xor $21,$0,$0 # 7 cycles from $0 load
-
- ldq $5,24(%4)
- xor $20,$0,$0
- ldq $6,32(%0)
- ldq $7,32(%1)
-
- stq $0,16(%0)
- xor $1,$2,$2 # 6 cycles from $2 load
- ldq $16,32(%2)
- xor $3,$4,$4 # 4 cycles from $4 load
-
- ldq $17,32(%3)
- xor $2,$4,$4
- ldq $18,32(%4)
- ldq $19,40(%0)
-
- ldq $20,40(%1)
- ldq $21,40(%2)
- ldq $0,40(%3)
- xor $4,$5,$5 # 7 cycles from $5 load
-
- stq $5,24(%0)
- xor $6,$7,$7 # 7 cycles from $7 load
- ldq $1,40(%4)
- ldq $2,48(%0)
-
- ldq $3,48(%1)
- xor $7,$16,$16 # 7 cycles from $16 load
- ldq $4,48(%2)
- xor $17,$18,$18 # 6 cycles from $18 load
-
- ldq $5,48(%3)
- xor $16,$18,$18
- ldq $6,48(%4)
- xor $19,$20,$20 # 7 cycles from $20 load
-
- stq $18,32(%0)
- xor $20,$21,$21 # 8 cycles from $21 load
- ldq $7,56(%0)
- xor $0,$1,$1 # 6 cycles from $1 load
-
- ldq $16,56(%1)
- ldq $17,56(%2)
- ldq $18,56(%3)
- ldq $19,56(%4)
-
- xor $21,$1,$1
- xor $2,$3,$3 # 9 cycles from $3 load
- xor $3,$4,$4 # 9 cycles from $4 load
- xor $5,$6,$6 # 8 cycles from $6 load
-
- unop
- xor $4,$6,$6
- xor $7,$16,$16 # 7 cycles from $16 load
- xor $17,$18,$18 # 6 cycles from $18 load
-
- stq $6,48(%0)
- xor $16,$18,$18
- subq %5,1,%5
- xor $18,$19,$19 # 8 cycles from $19 load
-
- stq $19,56(%0)
- addq %4,64,%4
- addq %3,64,%3
- addq %2,64,%2
-
- addq %1,64,%1
- addq %0,64,%0
- bgt %5,5b"
- : "=&r"(d), "=&r"(s1), "=&r"(s2), "=&r"(s3), "=r"(s4), "=r"(lines)
- /* ARG! We've run out of asm arguments! We've got to reload
- all those pointers we just loaded. */
- : "r"(bh_ptr), "i" (&((struct buffer_head *)0)->b_data), "5"(lines)
- : "memory", "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",
- "$16", "$17", "$18", "$19", "$20", "$21");
- return;
+ p4 = (unsigned long *) bh_ptr[4]->b_data;
+ active_template->do_5(bytes, p0, p1, p2, p3, p4);
}
-#define prefetch(base, ofs) \
- asm("ldq $31,%2(%0)" : "=r"(base) : "0"(base), "i"(ofs))
-
-XORBLOCK_TEMPLATE(alpha_prefetch)
-{
- long lines = bh_ptr[0]->b_size / sizeof (long) / 8;
- long *d = (long *) bh_ptr[0]->b_data;
- long *s1 = (long *) bh_ptr[1]->b_data;
- long *s2, *s3, *s4;
- long p;
-
- p = count == 2;
- prefetch(d, 0);
- prefetch(s1, 0);
- prefetch(d, 64);
- prefetch(s1, 64);
- prefetch(d, 128);
- prefetch(s1, 128);
- prefetch(d, 192);
- prefetch(s1, 192);
- if (p) goto two_blocks;
-
- s2 = (long *) bh_ptr[2]->b_data;
- p = count == 3;
- prefetch(s2, 0);
- prefetch(s2, 64);
- prefetch(s2, 128);
- prefetch(s2, 192);
- if (p) goto three_blocks;
-
- s3 = (long *) bh_ptr[3]->b_data;
- p = count == 4;
- prefetch(s3, 0);
- prefetch(s3, 64);
- prefetch(s3, 128);
- prefetch(s3, 192);
- if (p) goto four_blocks;
-
- s4 = (long *) bh_ptr[4]->b_data;
- prefetch(s4, 0);
- prefetch(s4, 64);
- prefetch(s4, 128);
- prefetch(s4, 192);
- goto five_blocks;
-
-two_blocks:
-asm volatile ("
- .align 4
-2:
- ldq $0,0(%0)
- ldq $1,0(%1)
- ldq $2,8(%0)
- ldq $3,8(%1)
-
- ldq $4,16(%0)
- ldq $5,16(%1)
- ldq $6,24(%0)
- ldq $7,24(%1)
-
- ldq $16,32(%0)
- ldq $17,32(%1)
- ldq $18,40(%0)
- ldq $19,40(%1)
-
- ldq $20,48(%0)
- ldq $21,48(%1)
- ldq $22,56(%0)
- ldq $23,56(%1)
-
- ldq $31,256(%0)
- xor $0,$1,$0 # 8 cycles from $1 load
- ldq $31,256(%1)
- xor $2,$3,$2
-
- stq $0,0(%0)
- xor $4,$5,$4
- stq $2,8(%0)
- xor $6,$7,$6
-
- stq $4,16(%0)
- xor $16,$17,$16
- stq $6,24(%0)
- xor $18,$19,$18
-
- stq $16,32(%0)
- xor $20,$21,$20
- stq $18,40(%0)
- xor $22,$23,$22
-
- stq $20,48(%0)
- subq %2,1,%2
- stq $22,56(%0)
- addq %0,64,%0
-
- addq %1,64,%1
- bgt %2,2b"
- : "=r"(d), "=r"(s1), "=r"(lines)
- : "0"(d), "1"(s1), "2"(lines)
- : "memory", "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",
- "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
- return;
-
-three_blocks:
-asm volatile ("
- .align 4
-3:
- ldq $0,0(%0)
- ldq $1,0(%1)
- ldq $2,0(%2)
- ldq $3,8(%0)
-
- ldq $4,8(%1)
- ldq $6,16(%0)
- ldq $7,16(%1)
- ldq $17,24(%0)
-
- ldq $18,24(%1)
- ldq $20,32(%0)
- ldq $21,32(%1)
- ldq $5,8(%2)
-
- ldq $16,16(%2)
- ldq $19,24(%2)
- ldq $22,32(%2)
- nop
-
- xor $0,$1,$1 # 8 cycles from $0 load
- xor $3,$4,$4 # 7 cycles from $4 load
- xor $6,$7,$7 # 6 cycles from $7 load
- xor $17,$18,$18 # 5 cycles from $18 load
-
- xor $1,$2,$2 # 9 cycles from $2 load
- xor $20,$21,$21 # 5 cycles from $21 load
- stq $2,0(%0)
- xor $4,$5,$5 # 6 cycles from $5 load
-
- stq $5,8(%0)
- xor $7,$16,$16 # 7 cycles from $16 load
- stq $16,16(%0)
- xor $18,$19,$19 # 7 cycles from $19 load
-
- stq $19,24(%0)
- xor $21,$22,$22 # 7 cycles from $22 load
- stq $22,32(%0)
- nop
-
- ldq $0,40(%0)
- ldq $1,40(%1)
- ldq $3,48(%0)
- ldq $4,48(%1)
-
- ldq $6,56(%0)
- ldq $7,56(%1)
- ldq $2,40(%2)
- ldq $5,48(%2)
-
- ldq $16,56(%2)
- ldq $31,256(%0)
- ldq $31,256(%1)
- ldq $31,256(%2)
-
- xor $0,$1,$1 # 6 cycles from $1 load
- xor $3,$4,$4 # 5 cycles from $4 load
- xor $6,$7,$7 # 5 cycles from $7 load
- xor $1,$2,$2 # 4 cycles from $2 load
-
- xor $4,$5,$5 # 5 cycles from $5 load
- xor $7,$16,$16 # 4 cycles from $16 load
- stq $2,40(%0)
- subq %3,1,%3
-
- stq $5,48(%0)
- addq %2,64,%2
- stq $16,56(%0)
- addq %1,64,%1
-
- addq %0,64,%0
- bgt %3,3b"
- : "=r"(d), "=r"(s1), "=r"(s2), "=r"(lines)
- : "0"(d), "1"(s1), "2"(s2), "3"(lines)
- : "memory", "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",
- "$16", "$17", "$18", "$19", "$20", "$21", "$22");
- return;
-
-four_blocks:
-asm volatile ("
- .align 4
-4:
- ldq $0,0(%0)
- ldq $1,0(%1)
- ldq $2,0(%2)
- ldq $3,0(%3)
-
- ldq $4,8(%0)
- ldq $5,8(%1)
- ldq $6,8(%2)
- ldq $7,8(%3)
-
- ldq $16,16(%0)
- ldq $17,16(%1)
- ldq $18,16(%2)
- ldq $19,16(%3)
-
- ldq $20,24(%0)
- xor $0,$1,$1 # 6 cycles from $1 load
- ldq $21,24(%1)
- xor $2,$3,$3 # 6 cycles from $3 load
-
- ldq $0,24(%2)
- xor $1,$3,$3
- ldq $1,24(%3)
- xor $4,$5,$5 # 7 cycles from $5 load
-
- stq $3,0(%0)
- xor $6,$7,$7
- xor $16,$17,$17 # 7 cycles from $17 load
- xor $5,$7,$7
-
- stq $7,8(%0)
- xor $18,$19,$19 # 7 cycles from $19 load
- ldq $2,32(%0)
- xor $17,$19,$19
-
- ldq $3,32(%1)
- ldq $4,32(%2)
- ldq $5,32(%3)
- xor $20,$21,$21 # 8 cycles from $21 load
-
- ldq $6,40(%0)
- ldq $7,40(%1)
- ldq $16,40(%2)
- ldq $17,40(%3)
-
- stq $19,16(%0)
- xor $0,$1,$1 # 9 cycles from $1 load
- xor $2,$3,$3 # 5 cycles from $3 load
- xor $21,$1,$1
-
- ldq $18,48(%0)
- xor $4,$5,$5 # 5 cycles from $5 load
- ldq $19,48(%1)
- xor $3,$5,$5
-
- ldq $20,48(%2)
- ldq $21,48(%3)
- ldq $0,56(%0)
- ldq $1,56(%1)
-
- ldq $2,56(%2)
- xor $6,$7,$7 # 8 cycles from $6 load
- ldq $3,56(%3)
- xor $16,$17,$17 # 8 cycles from $17 load
-
- ldq $31,256(%0)
- xor $7,$17,$17
- ldq $31,256(%1)
- xor $18,$19,$19 # 6 cycles from $19 load
+/* Set of all registered templates. */
+static struct xor_block_template *template_list;
- ldq $31,256(%2)
- xor $20,$21,$21 # 6 cycles from $21 load
- ldq $31,256(%3)
- xor $19,$21,$21
+/* The -6*32 shift factor colors the cache. */
+#define BENCH_SIZE (PAGE_SIZE-6*32)
- stq $1,24(%0)
- xor $0,$1,$1 # 7 cycles from $1 load
- stq $5,32(%0)
- xor $2,$3,$3 # 6 cycles from $3 load
-
- stq $17,40(%0)
- xor $1,$3,$3
- stq $21,48(%0)
- subq %4,1,%4
-
- stq $3,56(%0)
- addq %3,64,%3
- addq %2,64,%2
- addq %1,64,%1
-
- addq %0,64,%0
- bgt %4,4b"
- : "=r"(d), "=r"(s1), "=r"(s2), "=r"(s3), "=r"(lines)
- : "0"(d), "1"(s1), "2"(s2), "3"(s3), "4"(lines)
- : "memory", "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",
- "$16", "$17", "$18", "$19", "$20", "$21");
- return;
-
-five_blocks:
-asm volatile ("
- ldq %0,0(%6)
- ldq %1,8(%6)
- ldq %2,16(%6)
- ldq %3,24(%6)
- ldq %4,32(%6)
- ldq %0,%7(%0)
- ldq %1,%7(%1)
- ldq %2,%7(%2)
- ldq %3,%7(%3)
- ldq %4,%7(%4)
- .align 4
-5:
- ldq $0,0(%0)
- ldq $1,0(%1)
- ldq $2,0(%2)
- ldq $3,0(%3)
-
- ldq $4,0(%4)
- ldq $5,8(%0)
- ldq $6,8(%1)
- ldq $7,8(%2)
-
- ldq $16,8(%3)
- ldq $17,8(%4)
- ldq $18,16(%0)
- ldq $19,16(%1)
-
- ldq $20,16(%2)
- xor $0,$1,$1 # 6 cycles from $1 load
- ldq $21,16(%3)
- xor $2,$3,$3 # 6 cycles from $3 load
-
- ldq $0,16(%4)
- xor $1,$3,$3
- ldq $1,24(%0)
- xor $3,$4,$4 # 7 cycles from $4 load
-
- stq $4,0(%0)
- xor $5,$6,$6 # 7 cycles from $6 load
- xor $7,$16,$16 # 7 cycles from $16 load
- xor $6,$17,$17 # 7 cycles from $17 load
-
- ldq $2,24(%1)
- xor $16,$17,$17
- ldq $3,24(%2)
- xor $18,$19,$19 # 8 cycles from $19 load
-
- stq $17,8(%0)
- xor $19,$20,$20 # 8 cycles from $20 load
- ldq $4,24(%3)
- xor $21,$0,$0 # 7 cycles from $0 load
-
- ldq $5,24(%4)
- xor $20,$0,$0
- ldq $6,32(%0)
- ldq $7,32(%1)
-
- stq $0,16(%0)
- xor $1,$2,$2 # 6 cycles from $2 load
- ldq $16,32(%2)
- xor $3,$4,$4 # 4 cycles from $4 load
-
- ldq $17,32(%3)
- xor $2,$4,$4
- ldq $18,32(%4)
- ldq $19,40(%0)
-
- ldq $20,40(%1)
- ldq $21,40(%2)
- ldq $0,40(%3)
- xor $4,$5,$5 # 7 cycles from $5 load
-
- stq $5,24(%0)
- xor $6,$7,$7 # 7 cycles from $7 load
- ldq $1,40(%4)
- ldq $2,48(%0)
-
- ldq $3,48(%1)
- xor $7,$16,$16 # 7 cycles from $16 load
- ldq $4,48(%2)
- xor $17,$18,$18 # 6 cycles from $18 load
-
- ldq $5,48(%3)
- xor $16,$18,$18
- ldq $6,48(%4)
- xor $19,$20,$20 # 7 cycles from $20 load
-
- stq $18,32(%0)
- xor $20,$21,$21 # 8 cycles from $21 load
- ldq $7,56(%0)
- xor $0,$1,$1 # 6 cycles from $1 load
-
- ldq $16,56(%1)
- ldq $17,56(%2)
- ldq $18,56(%3)
- ldq $19,56(%4)
-
- ldq $31,256(%0)
- xor $21,$1,$1
- ldq $31,256(%1)
- xor $2,$3,$3 # 9 cycles from $3 load
-
- ldq $31,256(%2)
- xor $3,$4,$4 # 9 cycles from $4 load
- ldq $31,256(%3)
- xor $5,$6,$6 # 8 cycles from $6 load
-
- ldq $31,256(%4)
- xor $4,$6,$6
- xor $7,$16,$16 # 7 cycles from $16 load
- xor $17,$18,$18 # 6 cycles from $18 load
-
- stq $6,48(%0)
- xor $16,$18,$18
- subq %5,1,%5
- xor $18,$19,$19 # 8 cycles from $19 load
-
- stq $19,56(%0)
- addq %4,64,%4
- addq %3,64,%3
- addq %2,64,%2
-
- addq %1,64,%1
- addq %0,64,%0
- bgt %5,5b"
- : "=&r"(d), "=&r"(s1), "=&r"(s2), "=&r"(s3), "=r"(s4), "=r"(lines)
- /* ARG! We've run out of asm arguments! We've got to reload
- all those pointers we just loaded. */
- : "r"(bh_ptr), "i" (&((struct buffer_head *)0)->b_data), "5"(lines)
- : "memory", "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",
- "$16", "$17", "$18", "$19", "$20", "$21");
- return;
-}
-
-#undef prefetch
-
-#endif /* __alpha__ */
-
-#ifndef __sparc_v9__
-
-/*
- * this one works reasonably on any x86 CPU
- * (send me an assembly version for inclusion if you can make it faster)
- *
- * this one is just as fast as written in pure assembly on x86.
- * the reason for this separate version is that the
- * fast open-coded xor routine "32reg" produces suboptimal code
- * on x86, due to lack of registers.
- */
-XORBLOCK_TEMPLATE(8regs)
-{
- int len = bh_ptr[0]->b_size;
- long *destp = (long *) bh_ptr[0]->b_data;
- long *source1, *source2, *source3, *source4;
- long lines = len / (sizeof (long)) / 8, i;
-
- switch(count) {
- case 2:
- source1 = (long *) bh_ptr[1]->b_data;
- for (i = lines; i > 0; i--) {
- *(destp + 0) ^= *(source1 + 0);
- *(destp + 1) ^= *(source1 + 1);
- *(destp + 2) ^= *(source1 + 2);
- *(destp + 3) ^= *(source1 + 3);
- *(destp + 4) ^= *(source1 + 4);
- *(destp + 5) ^= *(source1 + 5);
- *(destp + 6) ^= *(source1 + 6);
- *(destp + 7) ^= *(source1 + 7);
- source1 += 8;
- destp += 8;
- }
- break;
- case 3:
- source2 = (long *) bh_ptr[2]->b_data;
- source1 = (long *) bh_ptr[1]->b_data;
- for (i = lines; i > 0; i--) {
- *(destp + 0) ^= *(source1 + 0);
- *(destp + 0) ^= *(source2 + 0);
- *(destp + 1) ^= *(source1 + 1);
- *(destp + 1) ^= *(source2 + 1);
- *(destp + 2) ^= *(source1 + 2);
- *(destp + 2) ^= *(source2 + 2);
- *(destp + 3) ^= *(source1 + 3);
- *(destp + 3) ^= *(source2 + 3);
- *(destp + 4) ^= *(source1 + 4);
- *(destp + 4) ^= *(source2 + 4);
- *(destp + 5) ^= *(source1 + 5);
- *(destp + 5) ^= *(source2 + 5);
- *(destp + 6) ^= *(source1 + 6);
- *(destp + 6) ^= *(source2 + 6);
- *(destp + 7) ^= *(source1 + 7);
- *(destp + 7) ^= *(source2 + 7);
- source1 += 8;
- source2 += 8;
- destp += 8;
- }
- break;
- case 4:
- source3 = (long *) bh_ptr[3]->b_data;
- source2 = (long *) bh_ptr[2]->b_data;
- source1 = (long *) bh_ptr[1]->b_data;
- for (i = lines; i > 0; i--) {
- *(destp + 0) ^= *(source1 + 0);
- *(destp + 0) ^= *(source2 + 0);
- *(destp + 0) ^= *(source3 + 0);
- *(destp + 1) ^= *(source1 + 1);
- *(destp + 1) ^= *(source2 + 1);
- *(destp + 1) ^= *(source3 + 1);
- *(destp + 2) ^= *(source1 + 2);
- *(destp + 2) ^= *(source2 + 2);
- *(destp + 2) ^= *(source3 + 2);
- *(destp + 3) ^= *(source1 + 3);
- *(destp + 3) ^= *(source2 + 3);
- *(destp + 3) ^= *(source3 + 3);
- *(destp + 4) ^= *(source1 + 4);
- *(destp + 4) ^= *(source2 + 4);
- *(destp + 4) ^= *(source3 + 4);
- *(destp + 5) ^= *(source1 + 5);
- *(destp + 5) ^= *(source2 + 5);
- *(destp + 5) ^= *(source3 + 5);
- *(destp + 6) ^= *(source1 + 6);
- *(destp + 6) ^= *(source2 + 6);
- *(destp + 6) ^= *(source3 + 6);
- *(destp + 7) ^= *(source1 + 7);
- *(destp + 7) ^= *(source2 + 7);
- *(destp + 7) ^= *(source3 + 7);
- source1 += 8;
- source2 += 8;
- source3 += 8;
- destp += 8;
- }
- break;
- case 5:
- source4 = (long *) bh_ptr[4]->b_data;
- source3 = (long *) bh_ptr[3]->b_data;
- source2 = (long *) bh_ptr[2]->b_data;
- source1 = (long *) bh_ptr[1]->b_data;
- for (i = lines; i > 0; i--) {
- *(destp + 0) ^= *(source1 + 0);
- *(destp + 0) ^= *(source2 + 0);
- *(destp + 0) ^= *(source3 + 0);
- *(destp + 0) ^= *(source4 + 0);
- *(destp + 1) ^= *(source1 + 1);
- *(destp + 1) ^= *(source2 + 1);
- *(destp + 1) ^= *(source3 + 1);
- *(destp + 1) ^= *(source4 + 1);
- *(destp + 2) ^= *(source1 + 2);
- *(destp + 2) ^= *(source2 + 2);
- *(destp + 2) ^= *(source3 + 2);
- *(destp + 2) ^= *(source4 + 2);
- *(destp + 3) ^= *(source1 + 3);
- *(destp + 3) ^= *(source2 + 3);
- *(destp + 3) ^= *(source3 + 3);
- *(destp + 3) ^= *(source4 + 3);
- *(destp + 4) ^= *(source1 + 4);
- *(destp + 4) ^= *(source2 + 4);
- *(destp + 4) ^= *(source3 + 4);
- *(destp + 4) ^= *(source4 + 4);
- *(destp + 5) ^= *(source1 + 5);
- *(destp + 5) ^= *(source2 + 5);
- *(destp + 5) ^= *(source3 + 5);
- *(destp + 5) ^= *(source4 + 5);
- *(destp + 6) ^= *(source1 + 6);
- *(destp + 6) ^= *(source2 + 6);
- *(destp + 6) ^= *(source3 + 6);
- *(destp + 6) ^= *(source4 + 6);
- *(destp + 7) ^= *(source1 + 7);
- *(destp + 7) ^= *(source2 + 7);
- *(destp + 7) ^= *(source3 + 7);
- *(destp + 7) ^= *(source4 + 7);
- source1 += 8;
- source2 += 8;
- source3 += 8;
- source4 += 8;
- destp += 8;
- }
- break;
- }
-}
-
-/*
- * platform independent RAID5 checksum calculation, this should
- * be very fast on any platform that has a decent amount of
- * registers. (32 or more)
- */
-XORBLOCK_TEMPLATE(32regs)
-{
- int size = bh_ptr[0]->b_size;
- int lines = size / (sizeof (long)) / 8, i;
- long *destp = (long *) bh_ptr[0]->b_data;
- long *source1, *source2, *source3, *source4;
-
- /* LOTS of registers available...
- We do explicite loop-unrolling here for code which
- favours RISC machines. In fact this is almoast direct
- RISC assembly on Alpha and SPARC :-) */
-
-
- switch(count) {
- case 2:
- source1 = (long *) bh_ptr[1]->b_data;
- for (i = lines; i > 0; i--) {
- register long d0, d1, d2, d3, d4, d5, d6, d7;
- d0 = destp[0]; /* Pull the stuff into registers */
- d1 = destp[1]; /* ... in bursts, if possible. */
- d2 = destp[2];
- d3 = destp[3];
- d4 = destp[4];
- d5 = destp[5];
- d6 = destp[6];
- d7 = destp[7];
- d0 ^= source1[0];
- d1 ^= source1[1];
- d2 ^= source1[2];
- d3 ^= source1[3];
- d4 ^= source1[4];
- d5 ^= source1[5];
- d6 ^= source1[6];
- d7 ^= source1[7];
- destp[0] = d0; /* Store the result (in burts) */
- destp[1] = d1;
- destp[2] = d2;
- destp[3] = d3;
- destp[4] = d4; /* Store the result (in burts) */
- destp[5] = d5;
- destp[6] = d6;
- destp[7] = d7;
- source1 += 8;
- destp += 8;
- }
- break;
- case 3:
- source2 = (long *) bh_ptr[2]->b_data;
- source1 = (long *) bh_ptr[1]->b_data;
- for (i = lines; i > 0; i--) {
- register long d0, d1, d2, d3, d4, d5, d6, d7;
- d0 = destp[0]; /* Pull the stuff into registers */
- d1 = destp[1]; /* ... in bursts, if possible. */
- d2 = destp[2];
- d3 = destp[3];
- d4 = destp[4];
- d5 = destp[5];
- d6 = destp[6];
- d7 = destp[7];
- d0 ^= source1[0];
- d1 ^= source1[1];
- d2 ^= source1[2];
- d3 ^= source1[3];
- d4 ^= source1[4];
- d5 ^= source1[5];
- d6 ^= source1[6];
- d7 ^= source1[7];
- d0 ^= source2[0];
- d1 ^= source2[1];
- d2 ^= source2[2];
- d3 ^= source2[3];
- d4 ^= source2[4];
- d5 ^= source2[5];
- d6 ^= source2[6];
- d7 ^= source2[7];
- destp[0] = d0; /* Store the result (in burts) */
- destp[1] = d1;
- destp[2] = d2;
- destp[3] = d3;
- destp[4] = d4; /* Store the result (in burts) */
- destp[5] = d5;
- destp[6] = d6;
- destp[7] = d7;
- source1 += 8;
- source2 += 8;
- destp += 8;
- }
- break;
- case 4:
- source3 = (long *) bh_ptr[3]->b_data;
- source2 = (long *) bh_ptr[2]->b_data;
- source1 = (long *) bh_ptr[1]->b_data;
- for (i = lines; i > 0; i--) {
- register long d0, d1, d2, d3, d4, d5, d6, d7;
- d0 = destp[0]; /* Pull the stuff into registers */
- d1 = destp[1]; /* ... in bursts, if possible. */
- d2 = destp[2];
- d3 = destp[3];
- d4 = destp[4];
- d5 = destp[5];
- d6 = destp[6];
- d7 = destp[7];
- d0 ^= source1[0];
- d1 ^= source1[1];
- d2 ^= source1[2];
- d3 ^= source1[3];
- d4 ^= source1[4];
- d5 ^= source1[5];
- d6 ^= source1[6];
- d7 ^= source1[7];
- d0 ^= source2[0];
- d1 ^= source2[1];
- d2 ^= source2[2];
- d3 ^= source2[3];
- d4 ^= source2[4];
- d5 ^= source2[5];
- d6 ^= source2[6];
- d7 ^= source2[7];
- d0 ^= source3[0];
- d1 ^= source3[1];
- d2 ^= source3[2];
- d3 ^= source3[3];
- d4 ^= source3[4];
- d5 ^= source3[5];
- d6 ^= source3[6];
- d7 ^= source3[7];
- destp[0] = d0; /* Store the result (in burts) */
- destp[1] = d1;
- destp[2] = d2;
- destp[3] = d3;
- destp[4] = d4; /* Store the result (in burts) */
- destp[5] = d5;
- destp[6] = d6;
- destp[7] = d7;
- source1 += 8;
- source2 += 8;
- source3 += 8;
- destp += 8;
- }
- break;
- case 5:
- source4 = (long *) bh_ptr[4]->b_data;
- source3 = (long *) bh_ptr[3]->b_data;
- source2 = (long *) bh_ptr[2]->b_data;
- source1 = (long *) bh_ptr[1]->b_data;
- for (i = lines; i > 0; i--) {
- register long d0, d1, d2, d3, d4, d5, d6, d7;
- d0 = destp[0]; /* Pull the stuff into registers */
- d1 = destp[1]; /* ... in bursts, if possible. */
- d2 = destp[2];
- d3 = destp[3];
- d4 = destp[4];
- d5 = destp[5];
- d6 = destp[6];
- d7 = destp[7];
- d0 ^= source1[0];
- d1 ^= source1[1];
- d2 ^= source1[2];
- d3 ^= source1[3];
- d4 ^= source1[4];
- d5 ^= source1[5];
- d6 ^= source1[6];
- d7 ^= source1[7];
- d0 ^= source2[0];
- d1 ^= source2[1];
- d2 ^= source2[2];
- d3 ^= source2[3];
- d4 ^= source2[4];
- d5 ^= source2[5];
- d6 ^= source2[6];
- d7 ^= source2[7];
- d0 ^= source3[0];
- d1 ^= source3[1];
- d2 ^= source3[2];
- d3 ^= source3[3];
- d4 ^= source3[4];
- d5 ^= source3[5];
- d6 ^= source3[6];
- d7 ^= source3[7];
- d0 ^= source4[0];
- d1 ^= source4[1];
- d2 ^= source4[2];
- d3 ^= source4[3];
- d4 ^= source4[4];
- d5 ^= source4[5];
- d6 ^= source4[6];
- d7 ^= source4[7];
- destp[0] = d0; /* Store the result (in burts) */
- destp[1] = d1;
- destp[2] = d2;
- destp[3] = d3;
- destp[4] = d4; /* Store the result (in burts) */
- destp[5] = d5;
- destp[6] = d6;
- destp[7] = d7;
- source1 += 8;
- source2 += 8;
- source3 += 8;
- source4 += 8;
- destp += 8;
- }
- break;
- }
-}
-
-/*
- * (the -6*32 shift factor colors the cache)
- */
-#define SIZE (PAGE_SIZE-6*32)
-
-static void xor_speed ( struct xor_block_template * func,
- struct buffer_head *b1, struct buffer_head *b2)
+static void
+do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
{
int speed;
unsigned long now;
int i, count, max;
- struct buffer_head *bh_ptr[6];
- func->next = xor_functions;
- xor_functions = func;
- bh_ptr[0] = b1;
- bh_ptr[1] = b2;
+ tmpl->next = template_list;
+ template_list = tmpl;
/*
- * count the number of XORs done during a whole jiffy.
- * calculate the speed of checksumming from this.
- * (we use a 2-page allocation to have guaranteed
- * color L1-cache layout)
+ * Count the number of XORs done during a whole jiffy, and use
+ * this to calculate the speed of checksumming. We use a 2-page
+ * allocation to have guaranteed color L1-cache layout.
*/
max = 0;
for (i = 0; i < 5; i++) {
@@ -2600,7 +82,7 @@ static void xor_speed ( struct xor_block_template * func,
count = 0;
while (jiffies == now) {
mb();
- func->xor_block(2,bh_ptr);
+ tmpl->do_2(BENCH_SIZE, b1, b2);
mb();
count++;
mb();
@@ -2609,120 +91,53 @@ static void xor_speed ( struct xor_block_template * func,
max = count;
}
- speed = max * (HZ*SIZE/1024);
- func->speed = speed;
+ speed = max * (HZ * BENCH_SIZE / 1024);
+ tmpl->speed = speed;
- printk( " %-10s: %5d.%03d MB/sec\n", func->name,
- speed / 1000, speed % 1000);
+ printk(" %-10s: %5d.%03d MB/sec\n", tmpl->name,
+ speed / 1000, speed % 1000);
}
-static inline void pick_fastest_function(void)
+static int
+calibrate_xor_block(void)
{
+ void *b1, *b2;
struct xor_block_template *f, *fastest;
- fastest = xor_functions;
- for (f = fastest; f; f = f->next) {
- if (f->speed > fastest->speed)
- fastest = f;
- }
-#ifdef CONFIG_X86_XMM
- if (cpu_has_xmm) {
- /* we force the use of the KNI xor block because it
- can write around l2. we may also be able
- to load into the l1 only depending on how
- the cpu deals with a load to a line that is
- being prefetched.
- */
- fastest = &t_xor_block_pIII_kni;
+ b1 = (void *) md__get_free_pages(GFP_KERNEL, 2);
+ if (! b1) {
+ printk("raid5: Yikes! No memory available.\n");
+ return -ENOMEM;
}
-#endif
-#ifdef __alpha__
- if (implver() == IMPLVER_EV6) {
- /* Force the use of alpha_prefetch if EV6, as it
- is significantly faster in the cold cache case. */
- fastest = &t_xor_block_alpha_prefetch;
- }
-#endif
- xor_block = fastest->xor_block;
- printk( "using fastest function: %s (%d.%03d MB/sec)\n", fastest->name,
- fastest->speed / 1000, fastest->speed % 1000);
-}
-
-static struct buffer_head b1, b2;
-
-void calibrate_xor_block(void)
-{
- if (xor_block)
- return;
- memset(&b1,0,sizeof(b1));
- b2 = b1;
-
- b1.b_data = (char *) md__get_free_pages(GFP_KERNEL,2);
- if (!b1.b_data) {
- pick_fastest_function();
- return;
- }
- b2.b_data = b1.b_data + 2*PAGE_SIZE + SIZE;
-
- b1.b_size = SIZE;
+ b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
printk(KERN_INFO "raid5: measuring checksumming speed\n");
+ sti();
- sti(); /* should be safe */
+#define xor_speed(templ) do_xor_speed((templ), b1, b2)
-#if defined(__sparc__) && !defined(__sparc_v9__)
- printk(KERN_INFO "raid5: trying high-speed SPARC checksum routine\n");
- xor_speed(&t_xor_block_SPARC,&b1,&b2);
-#endif
+ XOR_TRY_TEMPLATES;
-#ifdef CONFIG_X86_XMM
- if (cpu_has_xmm) {
- printk(KERN_INFO
- "raid5: KNI detected, trying cache-avoiding KNI checksum routine\n");
- xor_speed(&t_xor_block_pIII_kni,&b1,&b2);
- }
-#endif /* CONFIG_X86_XMM */
+#undef xor_speed
-#ifdef __i386__
- if (md_cpu_has_mmx()) {
- printk(KERN_INFO
- "raid5: MMX detected, trying high-speed MMX checksum routines\n");
- xor_speed(&t_xor_block_pII_mmx,&b1,&b2);
- xor_speed(&t_xor_block_p5_mmx,&b1,&b2);
- }
-#endif /* __i386__ */
+ free_pages((unsigned long)b1, 2);
-#ifdef __alpha__
- xor_speed(&t_xor_block_alpha,&b1,&b2);
- xor_speed(&t_xor_block_alpha_prefetch,&b1,&b2);
-#endif
-
- xor_speed(&t_xor_block_8regs,&b1,&b2);
- xor_speed(&t_xor_block_32regs,&b1,&b2);
+ fastest = template_list;
+ for (f = fastest; f; f = f->next)
+ if (f->speed > fastest->speed)
+ fastest = f;
- free_pages((unsigned long)b1.b_data,2);
- pick_fastest_function();
-}
+#ifdef XOR_SELECT_TEMPLATE
+ fastest = XOR_SELECT_TEMPLATE(fastest);
+#endif
-#else /* __sparc_v9__ */
+ active_template = fastest;
+ printk("raid5: using function: %s (%d.%03d MB/sec)\n",
+ fastest->name, fastest->speed / 1000, fastest->speed % 1000);
-void calibrate_xor_block(void)
-{
- if (xor_block)
- return;
- printk(KERN_INFO "raid5: using high-speed VIS checksum routine\n");
- xor_block = xor_block_VIS;
+ return 0;
}
-#endif /* __sparc_v9__ */
-
MD_EXPORT_SYMBOL(xor_block);
-MD_EXPORT_SYMBOL(calibrate_xor_block);
-#ifdef MODULE
-int init_module(void)
-{
- calibrate_xor_block();
- return 0;
-}
-#endif
+module_init(calibrate_xor_block);