summaryrefslogtreecommitdiffstats
path: root/include/asm-i386
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-11-28 03:58:46 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-11-28 03:58:46 +0000
commitb63ad0882a16a5d28003e57f2b0b81dee3fb322b (patch)
tree0a343ce219e2b8b38a5d702d66032c57b83d9720 /include/asm-i386
parenta9d7bff9a84dba79609a0002e5321b74c4d64c64 (diff)
Merge with 2.4.0-test11.
Diffstat (limited to 'include/asm-i386')
-rw-r--r--include/asm-i386/bugs.h204
-rw-r--r--include/asm-i386/cpufeature.h73
-rw-r--r--include/asm-i386/elf.h6
-rw-r--r--include/asm-i386/highmem.h17
-rw-r--r--include/asm-i386/i387.h1
-rw-r--r--include/asm-i386/module.h11
-rw-r--r--include/asm-i386/pgtable.h3
-rw-r--r--include/asm-i386/processor.h135
-rw-r--r--include/asm-i386/xor.h858
9 files changed, 1040 insertions, 268 deletions
diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h
index 4b95d09cd..4e77e5d8a 100644
--- a/include/asm-i386/bugs.h
+++ b/include/asm-i386/bugs.h
@@ -147,200 +147,6 @@ static void __init check_popad(void)
}
/*
- * B step AMD K6 before B 9730xxxx have hardware bugs that can cause
- * misexecution of code under Linux. Owners of such processors should
- * contact AMD for precise details and a CPU swap.
- *
- * See http://www.mygale.com/~poulot/k6bug.html
- * http://www.amd.com/K6/k6docs/revgd.html
- *
- * The following test is erm.. interesting. AMD neglected to up
- * the chip setting when fixing the bug but they also tweaked some
- * performance at the same time..
- */
-
-extern void vide(void);
-__asm__(".align 4\nvide: ret");
-
-static void __init check_amd_k6(void)
-{
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
- boot_cpu_data.x86 == 5 &&
- boot_cpu_data.x86_model == 6 &&
- boot_cpu_data.x86_mask == 1)
- {
- int n;
- void (*f_vide)(void);
- unsigned long d, d2;
-
- printk(KERN_INFO "AMD K6 stepping B detected - ");
-
-#define K6_BUG_LOOP 1000000
-
- /*
- * It looks like AMD fixed the 2.6.2 bug and improved indirect
- * calls at the same time.
- */
-
- n = K6_BUG_LOOP;
- f_vide = vide;
- rdtscl(d);
- while (n--)
- f_vide();
- rdtscl(d2);
- d = d2-d;
-
- /* Knock these two lines out if it debugs out ok */
- printk(KERN_INFO "K6 BUG %ld %d (Report these if test report is incorrect)\n", d, 20*K6_BUG_LOOP);
- printk(KERN_INFO "AMD K6 stepping B detected - ");
- /* -- cut here -- */
- if (d > 20*K6_BUG_LOOP)
- printk("system stability may be impaired when more than 32 MB are used.\n");
- else
- printk("probably OK (after B9730xxxx).\n");
- printk(KERN_INFO "Please see http://www.mygale.com/~poulot/k6bug.html\n");
- }
-}
-
-/*
- * All current models of Pentium and Pentium with MMX technology CPUs
- * have the F0 0F bug, which lets nonpriviledged users lock up the system:
- */
-
-#ifndef CONFIG_M686
-extern void trap_init_f00f_bug(void);
-
-static void __init check_pentium_f00f(void)
-{
- /*
- * Pentium and Pentium MMX
- */
- boot_cpu_data.f00f_bug = 0;
- if (boot_cpu_data.x86 == 5 && boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
- printk(KERN_INFO "Intel Pentium with F0 0F bug - workaround enabled.\n");
- boot_cpu_data.f00f_bug = 1;
- trap_init_f00f_bug();
- }
-}
-#endif
-
-/*
- * Perform the Cyrix 5/2 test. A Cyrix won't change
- * the flags, while other 486 chips will.
- */
-
-static inline int test_cyrix_52div(void)
-{
- unsigned int test;
-
- __asm__ __volatile__(
- "sahf\n\t" /* clear flags (%eax = 0x0005) */
- "div %b2\n\t" /* divide 5 by 2 */
- "lahf" /* store flags into %ah */
- : "=a" (test)
- : "0" (5), "q" (2)
- : "cc");
-
- /* AH is 0x02 on Cyrix after the divide.. */
- return (unsigned char) (test >> 8) == 0x02;
-}
-
-/*
- * Fix cpuid problems with Cyrix CPU's:
- * -- on the Cx686(L) the cpuid is disabled on power up.
- * -- braindamaged BIOS disable cpuid on the Cx686MX.
- */
-
-extern unsigned char Cx86_dir0_msb; /* exported HACK from cyrix_model() */
-
-static void __init check_cx686_cpuid(void)
-{
- if (boot_cpu_data.cpuid_level == -1 &&
- ((Cx86_dir0_msb == 5) || (Cx86_dir0_msb == 3))) {
- int eax, dummy;
- unsigned char ccr3, ccr4;
- __u32 old_cap;
-
- cli();
- ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
- ccr4 = getCx86(CX86_CCR4);
- setCx86(CX86_CCR4, ccr4 | 0x80); /* enable cpuid */
- setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
- sti();
-
- /* we have up to level 1 available on the Cx6x86(L|MX) */
- boot_cpu_data.cpuid_level = 1;
- /* Need to preserve some externally computed capabilities */
- old_cap = boot_cpu_data.x86_capability & X86_FEATURE_MTRR;
- cpuid(1, &eax, &dummy, &dummy,
- &boot_cpu_data.x86_capability);
- boot_cpu_data.x86_capability |= old_cap;
-
- boot_cpu_data.x86 = (eax >> 8) & 15;
- /*
- * we already have a cooked step/rev number from DIR1
- * so we don't use the cpuid-provided ones.
- */
- }
-}
-
-/*
- * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
- * BIOSes for compatability with DOS games. This makes the udelay loop
- * work correctly, and improves performance.
- */
-
-extern void calibrate_delay(void) __init;
-
-static void __init check_cx686_slop(void)
-{
- if (Cx86_dir0_msb == 3) {
- unsigned char ccr3, ccr5;
-
- cli();
- ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
- ccr5 = getCx86(CX86_CCR5);
- if (ccr5 & 2)
- setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */
- setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
- sti();
-
- if (ccr5 & 2) { /* possible wrong calibration done */
- printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n");
- calibrate_delay();
- boot_cpu_data.loops_per_sec = loops_per_sec;
- }
- }
-}
-
-/*
- * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected
- * by the fact that they preserve the flags across the division of 5/2.
- * PII and PPro exhibit this behavior too, but they have cpuid available.
- */
-
-static void __init check_cyrix_cpu(void)
-{
- if ((boot_cpu_data.cpuid_level == -1) && (boot_cpu_data.x86 == 4)
- && test_cyrix_52div()) {
-
- strcpy(boot_cpu_data.x86_vendor_id, "CyrixInstead");
- }
-}
-
-/*
- * In setup.c's cyrix_model() we have set the boot_cpu_data.coma_bug
- * on certain processors that we know contain this bug and now we
- * enable the workaround for it.
- */
-
-static void __init check_cyrix_coma(void)
-{
-}
-
-/*
* Check whether we are able to run this kernel safely on SMP.
*
* - In order to run on a i386, we need to be compiled for i386
@@ -391,7 +197,7 @@ static void __init check_config(void)
*/
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC)
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
- && boot_cpu_data.x86_capability & X86_FEATURE_APIC
+ && test_bit(X86_FEATURE_APIC, &boot_cpu_data.x86_capability)
&& boot_cpu_data.x86 == 5
&& boot_cpu_data.x86_model == 2
&& (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11))
@@ -409,10 +215,7 @@ static void __init check_config(void)
static void __init check_bugs(void)
{
- check_cyrix_cpu();
identify_cpu(&boot_cpu_data);
- check_cx686_cpuid();
- check_cx686_slop();
#ifndef CONFIG_SMP
printk("CPU: ");
print_cpu_info(&boot_cpu_data);
@@ -421,10 +224,5 @@ static void __init check_bugs(void)
check_fpu();
check_hlt();
check_popad();
- check_amd_k6();
-#ifndef CONFIG_M686
- check_pentium_f00f();
-#endif
- check_cyrix_coma();
system_utsname.machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
}
diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h
new file mode 100644
index 000000000..598edbdaf
--- /dev/null
+++ b/include/asm-i386/cpufeature.h
@@ -0,0 +1,73 @@
+/*
+ * cpufeature.h
+ *
+ * Defines x86 CPU feature bits
+ */
+
+#ifndef __ASM_I386_CPUFEATURE_H
+#define __ASM_I386_CPUFEATURE_H
+
+/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */
+#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT)
+
+#define NCAPINTS 4 /* Currently we have 4 32-bit words worth of info */
+
+/* Intel-defined CPU features, CPUID level 0x00000001, word 0 */
+#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
+#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */
+#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
+#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN (0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */
+#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */
+#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
+ /* of FPU context), and CR4.OSFXSR available */
+#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */
+#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */
+#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
+
+#endif /* __ASM_I386_CPUFEATURE_H */
+
+/*
+ * Local Variables:
+ * mode:c
+ * comment-column:42
+ * End:
+ */
diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h
index 55ffacff1..c8d826232 100644
--- a/include/asm-i386/elf.h
+++ b/include/asm-i386/elf.h
@@ -8,6 +8,8 @@
#include <asm/ptrace.h>
#include <asm/user.h>
+#include <linux/utsname.h>
+
typedef unsigned long elf_greg_t;
#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
@@ -84,7 +86,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
instruction set this CPU supports. This could be done in user space,
but it's not easy, and we've already done it here. */
-#define ELF_HWCAP (boot_cpu_data.x86_capability)
+#define ELF_HWCAP (boot_cpu_data.x86_capability[0])
/* This yields a string that ld.so will use to load implementation
specific libraries for optimization. This is more specific in
@@ -93,7 +95,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
For the moment, we have only optimizations for the Intel generations,
but that could change... */
-#define ELF_PLATFORM ("i386\0i486\0i586\0i686"+(((boot_cpu_data.x86>6?6:boot_cpu_data.x86)-3)*5))
+#define ELF_PLATFORM (system_utsname.machine)
#ifdef __KERNEL__
#define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX)
diff --git a/include/asm-i386/highmem.h b/include/asm-i386/highmem.h
index 8370b7eb6..dfff7fad0 100644
--- a/include/asm-i386/highmem.h
+++ b/include/asm-i386/highmem.h
@@ -53,19 +53,19 @@ extern void kmap_init(void) __init;
#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT)
#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT))
-extern unsigned long FASTCALL(kmap_high(struct page *page));
+extern void * FASTCALL(kmap_high(struct page *page));
extern void FASTCALL(kunmap_high(struct page *page));
-extern inline unsigned long kmap(struct page *page)
+static inline void *kmap(struct page *page)
{
if (in_interrupt())
BUG();
if (page < highmem_start_page)
- return (unsigned long) page_address(page);
+ return page_address(page);
return kmap_high(page);
}
-extern inline void kunmap(struct page *page)
+static inline void kunmap(struct page *page)
{
if (in_interrupt())
BUG();
@@ -80,13 +80,13 @@ extern inline void kunmap(struct page *page)
* be used in IRQ contexts, so in some (very limited) cases we need
* it.
*/
-extern inline unsigned long kmap_atomic(struct page *page, enum km_type type)
+static inline void *kmap_atomic(struct page *page, enum km_type type)
{
enum fixed_addresses idx;
unsigned long vaddr;
if (page < highmem_start_page)
- return (unsigned long) page_address(page);
+ return page_address(page);
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
@@ -97,12 +97,13 @@ extern inline unsigned long kmap_atomic(struct page *page, enum km_type type)
set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
__flush_tlb_one(vaddr);
- return vaddr;
+ return (void*) vaddr;
}
-extern inline void kunmap_atomic(unsigned long vaddr, enum km_type type)
+static inline void kunmap_atomic(void *kvaddr, enum km_type type)
{
#if HIGHMEM_DEBUG
+ unsigned long vaddr = (unsigned long) kvaddr;
enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
if (vaddr < FIXADDR_START) // FIXME
diff --git a/include/asm-i386/i387.h b/include/asm-i386/i387.h
index f8ebabe1b..04ba635e5 100644
--- a/include/asm-i386/i387.h
+++ b/include/asm-i386/i387.h
@@ -30,6 +30,7 @@ extern void restore_fpu( struct task_struct *tsk );
#define clear_fpu( tsk ) do { \
if ( tsk->flags & PF_USEDFPU ) { \
+ asm volatile("fwait"); \
tsk->flags &= ~PF_USEDFPU; \
stts(); \
} \
diff --git a/include/asm-i386/module.h b/include/asm-i386/module.h
new file mode 100644
index 000000000..61e2fd50e
--- /dev/null
+++ b/include/asm-i386/module.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_I386_MODULE_H
+#define _ASM_I386_MODULE_H
+/*
+ * This file contains the i386 architecture specific module code.
+ */
+
+#define module_map(x) vmalloc(x)
+#define module_unmap(x) vfree(x)
+#define module_arch_init(x) (0)
+
+#endif /* _ASM_I386_MODULE_H */
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 5460e6ccc..1fc0a0b9a 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -340,9 +340,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
#define swp_entry_to_pte(x) ((pte_t) { (x).val })
-#define module_map vmalloc
-#define module_unmap vfree
-
#endif /* !__ASSEMBLY__ */
/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index 76ac26cae..9e8e8c5ef 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -13,6 +13,7 @@
#include <asm/page.h>
#include <asm/types.h>
#include <asm/sigcontext.h>
+#include <asm/cpufeature.h>
#include <linux/config.h>
#include <linux/threads.h>
@@ -37,8 +38,8 @@ struct cpuinfo_x86 {
char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */
char hard_math;
char rfu;
- int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
- __u32 x86_capability;
+ int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
+ __u32 x86_capability[NCAPINTS];
char x86_vendor_id[16];
char x86_model_id[64];
int x86_cache_size; /* in KB - valid for CPUS which support this
@@ -67,39 +68,6 @@ struct cpuinfo_x86 {
* capabilities of CPUs
*/
-#define X86_FEATURE_FPU 0x00000001 /* onboard FPU */
-#define X86_FEATURE_VME 0x00000002 /* Virtual Mode Extensions */
-#define X86_FEATURE_DE 0x00000004 /* Debugging Extensions */
-#define X86_FEATURE_PSE 0x00000008 /* Page Size Extensions */
-#define X86_FEATURE_TSC 0x00000010 /* Time Stamp Counter */
-#define X86_FEATURE_MSR 0x00000020 /* Model-Specific Registers, RDMSR, WRMSR */
-#define X86_FEATURE_PAE 0x00000040 /* Physical Address Extensions */
-#define X86_FEATURE_MCE 0x00000080 /* Machine Check Exceptions */
-#define X86_FEATURE_CX8 0x00000100 /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC 0x00000200 /* onboard APIC */
-#define X86_FEATURE_10 0x00000400
-#define X86_FEATURE_SEP 0x00000800 /* Fast System Call */
-#define X86_FEATURE_MTRR 0x00001000 /* Memory Type Range Registers */
-#define X86_FEATURE_PGE 0x00002000 /* Page Global Enable */
-#define X86_FEATURE_MCA 0x00004000 /* Machine Check Architecture */
-#define X86_FEATURE_CMOV 0x00008000 /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
-#define X86_FEATURE_PAT 0x00010000 /* Page Attribute Table */
-#define X86_FEATURE_PSE36 0x00020000 /* 36-bit PSEs */
-#define X86_FEATURE_PN 0x00040000
-#define X86_FEATURE_19 0x00080000
-#define X86_FEATURE_20 0x00100000
-#define X86_FEATURE_21 0x00200000
-#define X86_FEATURE_22 0x00400000
-#define X86_FEATURE_MMX 0x00800000 /* Multimedia Extensions */
-#define X86_FEATURE_FXSR 0x01000000 /* FXSAVE and FXRSTOR instructions (fast save and restore of FPU context), and CR4.OSFXSR (OS uses these instructions) available */
-#define X86_FEATURE_XMM 0x02000000 /* Streaming SIMD Extensions */
-#define X86_FEATURE_26 0x04000000
-#define X86_FEATURE_27 0x08000000
-#define X86_FEATURE_28 0x10000000
-#define X86_FEATURE_29 0x20000000
-#define X86_FEATURE_30 0x40000000
-#define X86_FEATURE_AMD3D 0x80000000
-
extern struct cpuinfo_x86 boot_cpu_data;
extern struct tss_struct init_tss[NR_CPUS];
@@ -111,22 +79,15 @@ extern struct cpuinfo_x86 cpu_data[];
#define current_cpu_data boot_cpu_data
#endif
-#define cpu_has_pge \
- (boot_cpu_data.x86_capability & X86_FEATURE_PGE)
-#define cpu_has_pse \
- (boot_cpu_data.x86_capability & X86_FEATURE_PSE)
-#define cpu_has_pae \
- (boot_cpu_data.x86_capability & X86_FEATURE_PAE)
-#define cpu_has_tsc \
- (boot_cpu_data.x86_capability & X86_FEATURE_TSC)
-#define cpu_has_de \
- (boot_cpu_data.x86_capability & X86_FEATURE_DE)
-#define cpu_has_vme \
- (boot_cpu_data.x86_capability & X86_FEATURE_VME)
-#define cpu_has_fxsr \
- (boot_cpu_data.x86_capability & X86_FEATURE_FXSR)
-#define cpu_has_xmm \
- (boot_cpu_data.x86_capability & X86_FEATURE_XMM)
+#define cpu_has_pge (test_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability))
+#define cpu_has_pse (test_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability))
+#define cpu_has_pae (test_bit(X86_FEATURE_PAE, boot_cpu_data.x86_capability))
+#define cpu_has_tsc (test_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability))
+#define cpu_has_de (test_bit(X86_FEATURE_DE, boot_cpu_data.x86_capability))
+#define cpu_has_vme (test_bit(X86_FEATURE_VME, boot_cpu_data.x86_capability))
+#define cpu_has_fxsr (test_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability))
+#define cpu_has_xmm (test_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability))
+#define cpu_has_fpu (test_bit(X86_FEATURE_FPU, boot_cpu_data.x86_capability))
extern char ignore_irq13;
@@ -135,7 +96,28 @@ extern void print_cpu_info(struct cpuinfo_x86 *);
extern void dodgy_tsc(void);
/*
- * Generic CPUID function
+ * EFLAGS bits
+ */
+#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
+#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
+#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
+#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
+#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
+#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
+#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
+#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
+#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
+#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
+#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
+#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
+#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
+#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
+#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
+#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
+#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
+
+/*
+ * Generic CPUID function
*/
extern inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
{
@@ -147,6 +129,45 @@ extern inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
: "a" (op));
}
+/*
+ * CPUID functions returning a single datum
+ */
+extern inline unsigned int cpuid_eax(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ __asm__("cpuid"
+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+ : "a" (op));
+ return eax;
+}
+extern inline unsigned int cpuid_ebx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ __asm__("cpuid"
+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+ : "a" (op));
+ return ebx;
+}
+extern inline unsigned int cpuid_ecx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ __asm__("cpuid"
+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+ : "a" (op));
+ return ecx;
+}
+extern inline unsigned int cpuid_edx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ __asm__("cpuid"
+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+ : "a" (op));
+ return edx;
+}
/*
* Intel CPU features in CR4
@@ -220,7 +241,11 @@ static inline void clear_in_cr4 (unsigned long mask)
/*
* Bus types (default is ISA, but people can check others with these..)
*/
+#ifdef CONFIG_EISA
extern int EISA_bus;
+#else
+#define EISA_bus (0)
+#endif
extern int MCA_bus;
/* from system description table in BIOS. Mostly for MCA use, but
@@ -441,4 +466,10 @@ struct microcode {
#define MICROCODE_IOCFREE _IO('6',0) /* because it is for P6 */
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+extern inline void rep_nop(void)
+{
+ __asm__ __volatile__("rep;nop");
+}
+
#endif /* __ASM_I386_PROCESSOR_H */
diff --git a/include/asm-i386/xor.h b/include/asm-i386/xor.h
new file mode 100644
index 000000000..6a2230b8f
--- /dev/null
+++ b/include/asm-i386/xor.h
@@ -0,0 +1,858 @@
+/*
+ * include/asm-i386/xor.h
+ *
+ * Optimized RAID-5 checksumming functions for MMX and SSE.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * High-speed RAID5 checksumming functions utilizing MMX instructions.
+ * Copyright (C) 1998 Ingo Molnar.
+ */
+
+#define FPU_SAVE \
+ do { \
+ if (!(current->flags & PF_USEDFPU)) \
+ __asm__ __volatile__ (" clts;\n"); \
+ __asm__ __volatile__ ("fsave %0; fwait": "=m"(fpu_save[0])); \
+ } while (0)
+
+#define FPU_RESTORE \
+ do { \
+ __asm__ __volatile__ ("frstor %0": : "m"(fpu_save[0])); \
+ if (!(current->flags & PF_USEDFPU)) \
+ stts(); \
+ } while (0)
+
+#define LD(x,y) " movq 8*("#x")(%1), %%mm"#y" ;\n"
+#define ST(x,y) " movq %%mm"#y", 8*("#x")(%1) ;\n"
+#define XO1(x,y) " pxor 8*("#x")(%2), %%mm"#y" ;\n"
+#define XO2(x,y) " pxor 8*("#x")(%3), %%mm"#y" ;\n"
+#define XO3(x,y) " pxor 8*("#x")(%4), %%mm"#y" ;\n"
+#define XO4(x,y) " pxor 8*("#x")(%5), %%mm"#y" ;\n"
+
+
+static void
+xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ unsigned long lines = bytes >> 7;
+ char fpu_save[108];
+
+ FPU_SAVE;
+
+ __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+ LD(i,0) \
+ LD(i+1,1) \
+ LD(i+2,2) \
+ LD(i+3,3) \
+ XO1(i,0) \
+ ST(i,0) \
+ XO1(i+1,1) \
+ ST(i+1,1) \
+ XO1(i+2,2) \
+ ST(i+2,2) \
+ XO1(i+3,3) \
+ ST(i+3,3)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " addl $128, %1 ;\n"
+ " addl $128, %2 ;\n"
+ " decl %0 ;\n"
+ " jnz 1b ;\n"
+ :
+ : "r" (lines),
+ "r" (p1), "r" (p2)
+ : "memory");
+
+ FPU_RESTORE;
+}
+
+static void
+xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ unsigned long lines = bytes >> 7;
+ char fpu_save[108];
+
+ FPU_SAVE;
+
+ __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+ LD(i,0) \
+ LD(i+1,1) \
+ LD(i+2,2) \
+ LD(i+3,3) \
+ XO1(i,0) \
+ XO1(i+1,1) \
+ XO1(i+2,2) \
+ XO1(i+3,3) \
+ XO2(i,0) \
+ ST(i,0) \
+ XO2(i+1,1) \
+ ST(i+1,1) \
+ XO2(i+2,2) \
+ ST(i+2,2) \
+ XO2(i+3,3) \
+ ST(i+3,3)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " addl $128, %1 ;\n"
+ " addl $128, %2 ;\n"
+ " addl $128, %3 ;\n"
+ " decl %0 ;\n"
+ " jnz 1b ;\n"
+ :
+ : "r" (lines),
+ "r" (p1), "r" (p2), "r" (p3)
+ : "memory");
+
+ FPU_RESTORE;
+}
+
+static void
+xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ unsigned long lines = bytes >> 7;
+ char fpu_save[108];
+
+ FPU_SAVE;
+
+ __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+ LD(i,0) \
+ LD(i+1,1) \
+ LD(i+2,2) \
+ LD(i+3,3) \
+ XO1(i,0) \
+ XO1(i+1,1) \
+ XO1(i+2,2) \
+ XO1(i+3,3) \
+ XO2(i,0) \
+ XO2(i+1,1) \
+ XO2(i+2,2) \
+ XO2(i+3,3) \
+ XO3(i,0) \
+ ST(i,0) \
+ XO3(i+1,1) \
+ ST(i+1,1) \
+ XO3(i+2,2) \
+ ST(i+2,2) \
+ XO3(i+3,3) \
+ ST(i+3,3)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " addl $128, %1 ;\n"
+ " addl $128, %2 ;\n"
+ " addl $128, %3 ;\n"
+ " addl $128, %4 ;\n"
+ " decl %0 ;\n"
+ " jnz 1b ;\n"
+ :
+ : "r" (lines),
+ "r" (p1), "r" (p2), "r" (p3), "r" (p4)
+ : "memory");
+
+ FPU_RESTORE;
+}
+
+static void
+xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ unsigned long lines = bytes >> 7;
+ char fpu_save[108];
+
+ FPU_SAVE;
+
+ __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+ LD(i,0) \
+ LD(i+1,1) \
+ LD(i+2,2) \
+ LD(i+3,3) \
+ XO1(i,0) \
+ XO1(i+1,1) \
+ XO1(i+2,2) \
+ XO1(i+3,3) \
+ XO2(i,0) \
+ XO2(i+1,1) \
+ XO2(i+2,2) \
+ XO2(i+3,3) \
+ XO3(i,0) \
+ XO3(i+1,1) \
+ XO3(i+2,2) \
+ XO3(i+3,3) \
+ XO4(i,0) \
+ ST(i,0) \
+ XO4(i+1,1) \
+ ST(i+1,1) \
+ XO4(i+2,2) \
+ ST(i+2,2) \
+ XO4(i+3,3) \
+ ST(i+3,3)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " addl $128, %1 ;\n"
+ " addl $128, %2 ;\n"
+ " addl $128, %3 ;\n"
+ " addl $128, %4 ;\n"
+ " addl $128, %5 ;\n"
+ " decl %0 ;\n"
+ " jnz 1b ;\n"
+ :
+ : "g" (lines),
+ "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
+ : "memory");
+
+ FPU_RESTORE;
+}
+
+#undef LD
+#undef XO1
+#undef XO2
+#undef XO3
+#undef XO4
+#undef ST
+#undef BLOCK
+
+static void
+xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ unsigned long lines = bytes >> 6;
+ char fpu_save[108];
+
+ FPU_SAVE;
+
+ __asm__ __volatile__ (
+ " .align 32 ;\n"
+ " 1: ;\n"
+ " movq (%1), %%mm0 ;\n"
+ " movq 8(%1), %%mm1 ;\n"
+ " pxor (%2), %%mm0 ;\n"
+ " movq 16(%1), %%mm2 ;\n"
+ " movq %%mm0, (%1) ;\n"
+ " pxor 8(%2), %%mm1 ;\n"
+ " movq 24(%1), %%mm3 ;\n"
+ " movq %%mm1, 8(%1) ;\n"
+ " pxor 16(%2), %%mm2 ;\n"
+ " movq 32(%1), %%mm4 ;\n"
+ " movq %%mm2, 16(%1) ;\n"
+ " pxor 24(%2), %%mm3 ;\n"
+ " movq 40(%1), %%mm5 ;\n"
+ " movq %%mm3, 24(%1) ;\n"
+ " pxor 32(%2), %%mm4 ;\n"
+ " movq 48(%1), %%mm6 ;\n"
+ " movq %%mm4, 32(%1) ;\n"
+ " pxor 40(%2), %%mm5 ;\n"
+ " movq 56(%1), %%mm7 ;\n"
+ " movq %%mm5, 40(%1) ;\n"
+ " pxor 48(%2), %%mm6 ;\n"
+ " pxor 56(%2), %%mm7 ;\n"
+ " movq %%mm6, 48(%1) ;\n"
+ " movq %%mm7, 56(%1) ;\n"
+
+ " addl $64, %1 ;\n"
+ " addl $64, %2 ;\n"
+ " decl %0 ;\n"
+ " jnz 1b ;\n"
+ :
+ : "r" (lines),
+ "r" (p1), "r" (p2)
+ : "memory");
+
+ FPU_RESTORE;
+}
+
+static void
+xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ unsigned long lines = bytes >> 6;
+ char fpu_save[108];
+
+ FPU_SAVE;
+
+ __asm__ __volatile__ (
+ " .align 32,0x90 ;\n"
+ " 1: ;\n"
+ " movq (%1), %%mm0 ;\n"
+ " movq 8(%1), %%mm1 ;\n"
+ " pxor (%2), %%mm0 ;\n"
+ " movq 16(%1), %%mm2 ;\n"
+ " pxor 8(%2), %%mm1 ;\n"
+ " pxor (%3), %%mm0 ;\n"
+ " pxor 16(%2), %%mm2 ;\n"
+ " movq %%mm0, (%1) ;\n"
+ " pxor 8(%3), %%mm1 ;\n"
+ " pxor 16(%3), %%mm2 ;\n"
+ " movq 24(%1), %%mm3 ;\n"
+ " movq %%mm1, 8(%1) ;\n"
+ " movq 32(%1), %%mm4 ;\n"
+ " movq 40(%1), %%mm5 ;\n"
+ " pxor 24(%2), %%mm3 ;\n"
+ " movq %%mm2, 16(%1) ;\n"
+ " pxor 32(%2), %%mm4 ;\n"
+ " pxor 24(%3), %%mm3 ;\n"
+ " pxor 40(%2), %%mm5 ;\n"
+ " movq %%mm3, 24(%1) ;\n"
+ " pxor 32(%3), %%mm4 ;\n"
+ " pxor 40(%3), %%mm5 ;\n"
+ " movq 48(%1), %%mm6 ;\n"
+ " movq %%mm4, 32(%1) ;\n"
+ " movq 56(%1), %%mm7 ;\n"
+ " pxor 48(%2), %%mm6 ;\n"
+ " movq %%mm5, 40(%1) ;\n"
+ " pxor 56(%2), %%mm7 ;\n"
+ " pxor 48(%3), %%mm6 ;\n"
+ " pxor 56(%3), %%mm7 ;\n"
+ " movq %%mm6, 48(%1) ;\n"
+ " movq %%mm7, 56(%1) ;\n"
+
+ " addl $64, %1 ;\n"
+ " addl $64, %2 ;\n"
+ " addl $64, %3 ;\n"
+ " decl %0 ;\n"
+ " jnz 1b ;\n"
+ :
+ : "r" (lines),
+ "r" (p1), "r" (p2), "r" (p3)
+ : "memory" );
+
+ FPU_RESTORE;
+}
+
+static void
+xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ unsigned long lines = bytes >> 6;
+ char fpu_save[108];
+
+ FPU_SAVE;
+
+ __asm__ __volatile__ (
+ " .align 32,0x90 ;\n"
+ " 1: ;\n"
+ " movq (%1), %%mm0 ;\n"
+ " movq 8(%1), %%mm1 ;\n"
+ " pxor (%2), %%mm0 ;\n"
+ " movq 16(%1), %%mm2 ;\n"
+ " pxor 8(%2), %%mm1 ;\n"
+ " pxor (%3), %%mm0 ;\n"
+ " pxor 16(%2), %%mm2 ;\n"
+ " pxor 8(%3), %%mm1 ;\n"
+ " pxor (%4), %%mm0 ;\n"
+ " movq 24(%1), %%mm3 ;\n"
+ " pxor 16(%3), %%mm2 ;\n"
+ " pxor 8(%4), %%mm1 ;\n"
+ " movq %%mm0, (%1) ;\n"
+ " movq 32(%1), %%mm4 ;\n"
+ " pxor 24(%2), %%mm3 ;\n"
+ " pxor 16(%4), %%mm2 ;\n"
+ " movq %%mm1, 8(%1) ;\n"
+ " movq 40(%1), %%mm5 ;\n"
+ " pxor 32(%2), %%mm4 ;\n"
+ " pxor 24(%3), %%mm3 ;\n"
+ " movq %%mm2, 16(%1) ;\n"
+ " pxor 40(%2), %%mm5 ;\n"
+ " pxor 32(%3), %%mm4 ;\n"
+ " pxor 24(%4), %%mm3 ;\n"
+ " movq %%mm3, 24(%1) ;\n"
+ " movq 56(%1), %%mm7 ;\n"
+ " movq 48(%1), %%mm6 ;\n"
+ " pxor 40(%3), %%mm5 ;\n"
+ " pxor 32(%4), %%mm4 ;\n"
+ " pxor 48(%2), %%mm6 ;\n"
+ " movq %%mm4, 32(%1) ;\n"
+ " pxor 56(%2), %%mm7 ;\n"
+ " pxor 40(%4), %%mm5 ;\n"
+ " pxor 48(%3), %%mm6 ;\n"
+ " pxor 56(%3), %%mm7 ;\n"
+ " movq %%mm5, 40(%1) ;\n"
+ " pxor 48(%4), %%mm6 ;\n"
+ " pxor 56(%4), %%mm7 ;\n"
+ " movq %%mm6, 48(%1) ;\n"
+ " movq %%mm7, 56(%1) ;\n"
+
+ " addl $64, %1 ;\n"
+ " addl $64, %2 ;\n"
+ " addl $64, %3 ;\n"
+ " addl $64, %4 ;\n"
+ " decl %0 ;\n"
+ " jnz 1b ;\n"
+ :
+ : "r" (lines),
+ "r" (p1), "r" (p2), "r" (p3), "r" (p4)
+ : "memory");
+
+ FPU_RESTORE;
+}
+
+static void
+xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ unsigned long lines = bytes >> 6;
+ char fpu_save[108];
+
+ FPU_SAVE;
+
+ __asm__ __volatile__ (
+ " .align 32,0x90 ;\n"
+ " 1: ;\n"
+ " movq (%1), %%mm0 ;\n"
+ " movq 8(%1), %%mm1 ;\n"
+ " pxor (%2), %%mm0 ;\n"
+ " pxor 8(%2), %%mm1 ;\n"
+ " movq 16(%1), %%mm2 ;\n"
+ " pxor (%3), %%mm0 ;\n"
+ " pxor 8(%3), %%mm1 ;\n"
+ " pxor 16(%2), %%mm2 ;\n"
+ " pxor (%4), %%mm0 ;\n"
+ " pxor 8(%4), %%mm1 ;\n"
+ " pxor 16(%3), %%mm2 ;\n"
+ " movq 24(%1), %%mm3 ;\n"
+ " pxor (%5), %%mm0 ;\n"
+ " pxor 8(%5), %%mm1 ;\n"
+ " movq %%mm0, (%1) ;\n"
+ " pxor 16(%4), %%mm2 ;\n"
+ " pxor 24(%2), %%mm3 ;\n"
+ " movq %%mm1, 8(%1) ;\n"
+ " pxor 16(%5), %%mm2 ;\n"
+ " pxor 24(%3), %%mm3 ;\n"
+ " movq 32(%1), %%mm4 ;\n"
+ " movq %%mm2, 16(%1) ;\n"
+ " pxor 24(%4), %%mm3 ;\n"
+ " pxor 32(%2), %%mm4 ;\n"
+ " movq 40(%1), %%mm5 ;\n"
+ " pxor 24(%5), %%mm3 ;\n"
+ " pxor 32(%3), %%mm4 ;\n"
+ " pxor 40(%2), %%mm5 ;\n"
+ " movq %%mm3, 24(%1) ;\n"
+ " pxor 32(%4), %%mm4 ;\n"
+ " pxor 40(%3), %%mm5 ;\n"
+ " movq 48(%1), %%mm6 ;\n"
+ " movq 56(%1), %%mm7 ;\n"
+ " pxor 32(%5), %%mm4 ;\n"
+ " pxor 40(%4), %%mm5 ;\n"
+ " pxor 48(%2), %%mm6 ;\n"
+ " pxor 56(%2), %%mm7 ;\n"
+ " movq %%mm4, 32(%1) ;\n"
+ " pxor 48(%3), %%mm6 ;\n"
+ " pxor 56(%3), %%mm7 ;\n"
+ " pxor 40(%5), %%mm5 ;\n"
+ " pxor 48(%4), %%mm6 ;\n"
+ " pxor 56(%4), %%mm7 ;\n"
+ " movq %%mm5, 40(%1) ;\n"
+ " pxor 48(%5), %%mm6 ;\n"
+ " pxor 56(%5), %%mm7 ;\n"
+ " movq %%mm6, 48(%1) ;\n"
+ " movq %%mm7, 56(%1) ;\n"
+
+ " addl $64, %1 ;\n"
+ " addl $64, %2 ;\n"
+ " addl $64, %3 ;\n"
+ " addl $64, %4 ;\n"
+ " addl $64, %5 ;\n"
+ " decl %0 ;\n"
+ " jnz 1b ;\n"
+ :
+ : "g" (lines),
+ "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
+ : "memory");
+
+ FPU_RESTORE;
+}
+
+static struct xor_block_template xor_block_pII_mmx = {
+ name: "pII_mmx",
+ do_2: xor_pII_mmx_2,
+ do_3: xor_pII_mmx_3,
+ do_4: xor_pII_mmx_4,
+ do_5: xor_pII_mmx_5,
+};
+
+static struct xor_block_template xor_block_p5_mmx = {
+ name: "p5_mmx",
+ do_2: xor_p5_mmx_2,
+ do_3: xor_p5_mmx_3,
+ do_4: xor_p5_mmx_4,
+ do_5: xor_p5_mmx_5,
+};
+
+#undef FPU_SAVE
+#undef FPU_RESTORE
+
+/*
+ * Cache avoiding checksumming functions utilizing KNI instructions
+ * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
+ */
+
+#define XMMS_SAVE \
+ __asm__ __volatile__ ( \
+ "movl %%cr0,%0 ;\n\t" \
+ "clts ;\n\t" \
+ "movups %%xmm0,(%1) ;\n\t" \
+ "movups %%xmm1,0x10(%1) ;\n\t" \
+ "movups %%xmm2,0x20(%1) ;\n\t" \
+ "movups %%xmm3,0x30(%1) ;\n\t" \
+ : "=r" (cr0) \
+ : "r" (xmm_save) \
+ : "memory")
+
+#define XMMS_RESTORE \
+ __asm__ __volatile__ ( \
+ "sfence ;\n\t" \
+ "movups (%1),%%xmm0 ;\n\t" \
+ "movups 0x10(%1),%%xmm1 ;\n\t" \
+ "movups 0x20(%1),%%xmm2 ;\n\t" \
+ "movups 0x30(%1),%%xmm3 ;\n\t" \
+ "movl %0,%%cr0 ;\n\t" \
+ : \
+ : "r" (cr0), "r" (xmm_save) \
+ : "memory")
+
+#define OFFS(x) "16*("#x")"
+#define PF0(x) " prefetcht0 "OFFS(x)"(%1) ;\n"
+#define LD(x,y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n"
+#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n"
+#define PF1(x) " prefetchnta "OFFS(x)"(%2) ;\n"
+#define PF2(x) " prefetchnta "OFFS(x)"(%3) ;\n"
+#define PF3(x) " prefetchnta "OFFS(x)"(%4) ;\n"
+#define PF4(x) " prefetchnta "OFFS(x)"(%5) ;\n"
+#define PF5(x) " prefetchnta "OFFS(x)"(%6) ;\n"
+#define XO1(x,y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n"
+#define XO2(x,y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n"
+#define XO3(x,y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n"
+#define XO4(x,y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n"
+#define XO5(x,y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n"
+
+
+static void
+xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ unsigned long lines = bytes >> 8;
+ char xmm_save[16*4];
+ int cr0;
+
+ XMMS_SAVE;
+
+ __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+ LD(i,0) \
+ LD(i+1,1) \
+ PF1(i) \
+ PF1(i+2) \
+ LD(i+2,2) \
+ LD(i+3,3) \
+ PF0(i+4) \
+ PF0(i+6) \
+ XO1(i,0) \
+ XO1(i+1,1) \
+ XO1(i+2,2) \
+ XO1(i+3,3) \
+ ST(i,0) \
+ ST(i+1,1) \
+ ST(i+2,2) \
+ ST(i+3,3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " addl $256, %1 ;\n"
+ " addl $256, %2 ;\n"
+ " decl %0 ;\n"
+ " jnz 1b ;\n"
+ :
+ : "r" (lines),
+ "r" (p1), "r" (p2)
+ : "memory");
+
+ XMMS_RESTORE;
+}
+
+static void
+xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ unsigned long lines = bytes >> 8;
+ char xmm_save[16*4];
+ int cr0;
+
+ XMMS_SAVE;
+
+ __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+ PF1(i) \
+ PF1(i+2) \
+ LD(i,0) \
+ LD(i+1,1) \
+ LD(i+2,2) \
+ LD(i+3,3) \
+ PF2(i) \
+ PF2(i+2) \
+ PF0(i+4) \
+ PF0(i+6) \
+ XO1(i,0) \
+ XO1(i+1,1) \
+ XO1(i+2,2) \
+ XO1(i+3,3) \
+ XO2(i,0) \
+ XO2(i+1,1) \
+ XO2(i+2,2) \
+ XO2(i+3,3) \
+ ST(i,0) \
+ ST(i+1,1) \
+ ST(i+2,2) \
+ ST(i+3,3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " addl $256, %1 ;\n"
+ " addl $256, %2 ;\n"
+ " addl $256, %3 ;\n"
+ " decl %0 ;\n"
+ " jnz 1b ;\n"
+ :
+ : "r" (lines),
+ "r" (p1), "r"(p2), "r"(p3)
+ : "memory" );
+
+ XMMS_RESTORE;
+}
+
+static void
+xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ unsigned long lines = bytes >> 8;
+ char xmm_save[16*4];
+ int cr0;
+
+ XMMS_SAVE;
+
+ __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+ PF1(i) \
+ PF1(i+2) \
+ LD(i,0) \
+ LD(i+1,1) \
+ LD(i+2,2) \
+ LD(i+3,3) \
+ PF2(i) \
+ PF2(i+2) \
+ XO1(i,0) \
+ XO1(i+1,1) \
+ XO1(i+2,2) \
+ XO1(i+3,3) \
+ PF3(i) \
+ PF3(i+2) \
+ PF0(i+4) \
+ PF0(i+6) \
+ XO2(i,0) \
+ XO2(i+1,1) \
+ XO2(i+2,2) \
+ XO2(i+3,3) \
+ XO3(i,0) \
+ XO3(i+1,1) \
+ XO3(i+2,2) \
+ XO3(i+3,3) \
+ ST(i,0) \
+ ST(i+1,1) \
+ ST(i+2,2) \
+ ST(i+3,3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " addl $256, %1 ;\n"
+ " addl $256, %2 ;\n"
+ " addl $256, %3 ;\n"
+ " addl $256, %4 ;\n"
+ " decl %0 ;\n"
+ " jnz 1b ;\n"
+ :
+ : "r" (lines),
+ "r" (p1), "r" (p2), "r" (p3), "r" (p4)
+ : "memory" );
+
+ XMMS_RESTORE;
+}
+
+static void
+xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ unsigned long lines = bytes >> 8;
+ char xmm_save[16*4];
+ int cr0;
+
+ XMMS_SAVE;
+
+ __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+ PF1(i) \
+ PF1(i+2) \
+ LD(i,0) \
+ LD(i+1,1) \
+ LD(i+2,2) \
+ LD(i+3,3) \
+ PF2(i) \
+ PF2(i+2) \
+ XO1(i,0) \
+ XO1(i+1,1) \
+ XO1(i+2,2) \
+ XO1(i+3,3) \
+ PF3(i) \
+ PF3(i+2) \
+ XO2(i,0) \
+ XO2(i+1,1) \
+ XO2(i+2,2) \
+ XO2(i+3,3) \
+ PF4(i) \
+ PF4(i+2) \
+ PF0(i+4) \
+ PF0(i+6) \
+ XO3(i,0) \
+ XO3(i+1,1) \
+ XO3(i+2,2) \
+ XO3(i+3,3) \
+ XO4(i,0) \
+ XO4(i+1,1) \
+ XO4(i+2,2) \
+ XO4(i+3,3) \
+ ST(i,0) \
+ ST(i+1,1) \
+ ST(i+2,2) \
+ ST(i+3,3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " addl $256, %1 ;\n"
+ " addl $256, %2 ;\n"
+ " addl $256, %3 ;\n"
+ " addl $256, %4 ;\n"
+ " addl $256, %5 ;\n"
+ " decl %0 ;\n"
+ " jnz 1b ;\n"
+ :
+ : "r" (lines),
+ "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
+ : "memory");
+
+ XMMS_RESTORE;
+}
+
+static struct xor_block_template xor_block_pIII_sse = {
+ name: "pIII_sse",
+ do_2: xor_sse_2,
+ do_3: xor_sse_3,
+ do_4: xor_sse_4,
+ do_5: xor_sse_5,
+};
+
+/* Also try the generic routines. */
+#include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+ do { \
+ xor_speed(&xor_block_8regs); \
+ xor_speed(&xor_block_32regs); \
+ if (cpu_has_xmm) \
+ xor_speed(&xor_block_pIII_sse); \
+ if (md_cpu_has_mmx()) { \
+ xor_speed(&xor_block_pII_mmx); \
+ xor_speed(&xor_block_p5_mmx); \
+ } \
+ } while (0)
+
+/* We force the use of the SSE xor block because it can write around L2.
+ We may also be able to load into the L1 only depending on how the cpu
+ deals with a load to a line that is being prefetched. */
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+ (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)