diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-11-28 03:58:46 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-11-28 03:58:46 +0000 |
commit | b63ad0882a16a5d28003e57f2b0b81dee3fb322b (patch) | |
tree | 0a343ce219e2b8b38a5d702d66032c57b83d9720 /include/asm-i386 | |
parent | a9d7bff9a84dba79609a0002e5321b74c4d64c64 (diff) |
Merge with 2.4.0-test11.
Diffstat (limited to 'include/asm-i386')
-rw-r--r-- | include/asm-i386/bugs.h | 204 | ||||
-rw-r--r-- | include/asm-i386/cpufeature.h | 73 | ||||
-rw-r--r-- | include/asm-i386/elf.h | 6 | ||||
-rw-r--r-- | include/asm-i386/highmem.h | 17 | ||||
-rw-r--r-- | include/asm-i386/i387.h | 1 | ||||
-rw-r--r-- | include/asm-i386/module.h | 11 | ||||
-rw-r--r-- | include/asm-i386/pgtable.h | 3 | ||||
-rw-r--r-- | include/asm-i386/processor.h | 135 | ||||
-rw-r--r-- | include/asm-i386/xor.h | 858 |
9 files changed, 1040 insertions, 268 deletions
diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h index 4b95d09cd..4e77e5d8a 100644 --- a/include/asm-i386/bugs.h +++ b/include/asm-i386/bugs.h @@ -147,200 +147,6 @@ static void __init check_popad(void) } /* - * B step AMD K6 before B 9730xxxx have hardware bugs that can cause - * misexecution of code under Linux. Owners of such processors should - * contact AMD for precise details and a CPU swap. - * - * See http://www.mygale.com/~poulot/k6bug.html - * http://www.amd.com/K6/k6docs/revgd.html - * - * The following test is erm.. interesting. AMD neglected to up - * the chip setting when fixing the bug but they also tweaked some - * performance at the same time.. - */ - -extern void vide(void); -__asm__(".align 4\nvide: ret"); - -static void __init check_amd_k6(void) -{ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 == 5 && - boot_cpu_data.x86_model == 6 && - boot_cpu_data.x86_mask == 1) - { - int n; - void (*f_vide)(void); - unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - -#define K6_BUG_LOOP 1000000 - - /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect - * calls at the same time. - */ - - n = K6_BUG_LOOP; - f_vide = vide; - rdtscl(d); - while (n--) - f_vide(); - rdtscl(d2); - d = d2-d; - - /* Knock these two lines out if it debugs out ok */ - printk(KERN_INFO "K6 BUG %ld %d (Report these if test report is incorrect)\n", d, 20*K6_BUG_LOOP); - printk(KERN_INFO "AMD K6 stepping B detected - "); - /* -- cut here -- */ - if (d > 20*K6_BUG_LOOP) - printk("system stability may be impaired when more than 32 MB are used.\n"); - else - printk("probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://www.mygale.com/~poulot/k6bug.html\n"); - } -} - -/* - * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonpriviledged users lock up the system: - */ - -#ifndef CONFIG_M686 -extern void trap_init_f00f_bug(void); - -static void __init check_pentium_f00f(void) -{ - /* - * Pentium and Pentium MMX - */ - boot_cpu_data.f00f_bug = 0; - if (boot_cpu_data.x86 == 5 && boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { - printk(KERN_INFO "Intel Pentium with F0 0F bug - workaround enabled.\n"); - boot_cpu_data.f00f_bug = 1; - trap_init_f00f_bug(); - } -} -#endif - -/* - * Perform the Cyrix 5/2 test. A Cyrix won't change - * the flags, while other 486 chips will. - */ - -static inline int test_cyrix_52div(void) -{ - unsigned int test; - - __asm__ __volatile__( - "sahf\n\t" /* clear flags (%eax = 0x0005) */ - "div %b2\n\t" /* divide 5 by 2 */ - "lahf" /* store flags into %ah */ - : "=a" (test) - : "0" (5), "q" (2) - : "cc"); - - /* AH is 0x02 on Cyrix after the divide.. */ - return (unsigned char) (test >> 8) == 0x02; -} - -/* - * Fix cpuid problems with Cyrix CPU's: - * -- on the Cx686(L) the cpuid is disabled on power up. - * -- braindamaged BIOS disable cpuid on the Cx686MX. - */ - -extern unsigned char Cx86_dir0_msb; /* exported HACK from cyrix_model() */ - -static void __init check_cx686_cpuid(void) -{ - if (boot_cpu_data.cpuid_level == -1 && - ((Cx86_dir0_msb == 5) || (Cx86_dir0_msb == 3))) { - int eax, dummy; - unsigned char ccr3, ccr4; - __u32 old_cap; - - cli(); - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - ccr4 = getCx86(CX86_CCR4); - setCx86(CX86_CCR4, ccr4 | 0x80); /* enable cpuid */ - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - sti(); - - /* we have up to level 1 available on the Cx6x86(L|MX) */ - boot_cpu_data.cpuid_level = 1; - /* Need to preserve some externally computed capabilities */ - old_cap = boot_cpu_data.x86_capability & X86_FEATURE_MTRR; - cpuid(1, &eax, &dummy, &dummy, - &boot_cpu_data.x86_capability); - boot_cpu_data.x86_capability |= old_cap; - - boot_cpu_data.x86 = (eax >> 8) & 15; - /* - * we already have a cooked step/rev number from DIR1 - * so we don't use the cpuid-provided ones. - */ - } -} - -/* - * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old - * BIOSes for compatability with DOS games. This makes the udelay loop - * work correctly, and improves performance. - */ - -extern void calibrate_delay(void) __init; - -static void __init check_cx686_slop(void) -{ - if (Cx86_dir0_msb == 3) { - unsigned char ccr3, ccr5; - - cli(); - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - ccr5 = getCx86(CX86_CCR5); - if (ccr5 & 2) - setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */ - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - sti(); - - if (ccr5 & 2) { /* possible wrong calibration done */ - printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n"); - calibrate_delay(); - boot_cpu_data.loops_per_sec = loops_per_sec; - } - } -} - -/* - * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected - * by the fact that they preserve the flags across the division of 5/2. - * PII and PPro exhibit this behavior too, but they have cpuid available. - */ - -static void __init check_cyrix_cpu(void) -{ - if ((boot_cpu_data.cpuid_level == -1) && (boot_cpu_data.x86 == 4) - && test_cyrix_52div()) { - - strcpy(boot_cpu_data.x86_vendor_id, "CyrixInstead"); - } -} - -/* - * In setup.c's cyrix_model() we have set the boot_cpu_data.coma_bug - * on certain processors that we know contain this bug and now we - * enable the workaround for it. - */ - -static void __init check_cyrix_coma(void) -{ -} - -/* * Check whether we are able to run this kernel safely on SMP. * * - In order to run on a i386, we need to be compiled for i386 @@ -391,7 +197,7 @@ static void __init check_config(void) */ #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL - && boot_cpu_data.x86_capability & X86_FEATURE_APIC + && test_bit(X86_FEATURE_APIC, &boot_cpu_data.x86_capability) && boot_cpu_data.x86 == 5 && boot_cpu_data.x86_model == 2 && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) @@ -409,10 +215,7 @@ static void __init check_config(void) static void __init check_bugs(void) { - check_cyrix_cpu(); identify_cpu(&boot_cpu_data); - check_cx686_cpuid(); - check_cx686_slop(); #ifndef CONFIG_SMP printk("CPU: "); print_cpu_info(&boot_cpu_data); @@ -421,10 +224,5 @@ static void __init check_bugs(void) check_fpu(); check_hlt(); check_popad(); - check_amd_k6(); -#ifndef CONFIG_M686 - check_pentium_f00f(); -#endif - check_cyrix_coma(); system_utsname.machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); } diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h new file mode 100644 index 000000000..598edbdaf --- /dev/null +++ b/include/asm-i386/cpufeature.h @@ -0,0 +1,73 @@ +/* + * cpufeature.h + * + * Defines x86 CPU feature bits + */ + +#ifndef __ASM_I386_CPUFEATURE_H +#define __ASM_I386_CPUFEATURE_H + +/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */ +#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT) + +#define NCAPINTS 4 /* Currently we have 4 32-bit words worth of info */ + +/* Intel-defined CPU features, CPUID level 0x00000001, word 0 */ +#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ +#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */ +#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ +#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ +#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ +#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ +#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */ +#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ +#define X86_FEATURE_PN (0*32+18) /* Processor serial number */ +#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ +#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */ +#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ +#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ + /* of FPU context), and CR4.OSFXSR available */ +#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ +#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */ +#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */ +#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ + +/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ +/* Don't duplicate feature flags which are redundant with Intel! */ +#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ +#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ +#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ + +/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ +#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */ +#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */ + +/* Other features, Linux-defined mapping, word 3 */ +/* This range is used for feature bits which conflict or are synthesized */ +#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ + +#endif /* __ASM_I386_CPUFEATURE_H */ + +/* + * Local Variables: + * mode:c + * comment-column:42 + * End: + */ diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h index 55ffacff1..c8d826232 100644 --- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h @@ -8,6 +8,8 @@ #include <asm/ptrace.h> #include <asm/user.h> +#include <linux/utsname.h> + typedef unsigned long elf_greg_t; #define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t)) @@ -84,7 +86,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t; instruction set this CPU supports. This could be done in user space, but it's not easy, and we've already done it here. */ -#define ELF_HWCAP (boot_cpu_data.x86_capability) +#define ELF_HWCAP (boot_cpu_data.x86_capability[0]) /* This yields a string that ld.so will use to load implementation specific libraries for optimization. This is more specific in @@ -93,7 +95,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t; For the moment, we have only optimizations for the Intel generations, but that could change... */ -#define ELF_PLATFORM ("i386\0i486\0i586\0i686"+(((boot_cpu_data.x86>6?6:boot_cpu_data.x86)-3)*5)) +#define ELF_PLATFORM (system_utsname.machine) #ifdef __KERNEL__ #define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX) diff --git a/include/asm-i386/highmem.h b/include/asm-i386/highmem.h index 8370b7eb6..dfff7fad0 100644 --- a/include/asm-i386/highmem.h +++ b/include/asm-i386/highmem.h @@ -53,19 +53,19 @@ extern void kmap_init(void) __init; #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) -extern unsigned long FASTCALL(kmap_high(struct page *page)); +extern void * FASTCALL(kmap_high(struct page *page)); extern void FASTCALL(kunmap_high(struct page *page)); -extern inline unsigned long kmap(struct page *page) +static inline void *kmap(struct page *page) { if (in_interrupt()) BUG(); if (page < highmem_start_page) - return (unsigned long) page_address(page); + return page_address(page); return kmap_high(page); } -extern inline void kunmap(struct page *page) +static inline void kunmap(struct page *page) { if (in_interrupt()) BUG(); @@ -80,13 +80,13 @@ extern inline void kunmap(struct page *page) * be used in IRQ contexts, so in some (very limited) cases we need * it. */ -extern inline unsigned long kmap_atomic(struct page *page, enum km_type type) +static inline void *kmap_atomic(struct page *page, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; if (page < highmem_start_page) - return (unsigned long) page_address(page); + return page_address(page); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); @@ -97,12 +97,13 @@ extern inline unsigned long kmap_atomic(struct page *page, enum km_type type) set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); __flush_tlb_one(vaddr); - return vaddr; + return (void*) vaddr; } -extern inline void kunmap_atomic(unsigned long vaddr, enum km_type type) +static inline void kunmap_atomic(void *kvaddr, enum km_type type) { #if HIGHMEM_DEBUG + unsigned long vaddr = (unsigned long) kvaddr; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); if (vaddr < FIXADDR_START) // FIXME diff --git a/include/asm-i386/i387.h b/include/asm-i386/i387.h index f8ebabe1b..04ba635e5 100644 --- a/include/asm-i386/i387.h +++ b/include/asm-i386/i387.h @@ -30,6 +30,7 @@ extern void restore_fpu( struct task_struct *tsk ); #define clear_fpu( tsk ) do { \ if ( tsk->flags & PF_USEDFPU ) { \ + asm volatile("fwait"); \ tsk->flags &= ~PF_USEDFPU; \ stts(); \ } \ diff --git a/include/asm-i386/module.h b/include/asm-i386/module.h new file mode 100644 index 000000000..61e2fd50e --- /dev/null +++ b/include/asm-i386/module.h @@ -0,0 +1,11 @@ +#ifndef _ASM_I386_MODULE_H +#define _ASM_I386_MODULE_H +/* + * This file contains the i386 architecture specific module code. + */ + +#define module_map(x) vmalloc(x) +#define module_unmap(x) vfree(x) +#define module_arch_init(x) (0) + +#endif /* _ASM_I386_MODULE_H */ diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 5460e6ccc..1fc0a0b9a 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -340,9 +340,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) #define swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define module_map vmalloc -#define module_unmap vfree - #endif /* !__ASSEMBLY__ */ /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 76ac26cae..9e8e8c5ef 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -13,6 +13,7 @@ #include <asm/page.h> #include <asm/types.h> #include <asm/sigcontext.h> +#include <asm/cpufeature.h> #include <linux/config.h> #include <linux/threads.h> @@ -37,8 +38,8 @@ struct cpuinfo_x86 { char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ char hard_math; char rfu; - int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ - __u32 x86_capability; + int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ + __u32 x86_capability[NCAPINTS]; char x86_vendor_id[16]; char x86_model_id[64]; int x86_cache_size; /* in KB - valid for CPUS which support this @@ -67,39 +68,6 @@ struct cpuinfo_x86 { * capabilities of CPUs */ -#define X86_FEATURE_FPU 0x00000001 /* onboard FPU */ -#define X86_FEATURE_VME 0x00000002 /* Virtual Mode Extensions */ -#define X86_FEATURE_DE 0x00000004 /* Debugging Extensions */ -#define X86_FEATURE_PSE 0x00000008 /* Page Size Extensions */ -#define X86_FEATURE_TSC 0x00000010 /* Time Stamp Counter */ -#define X86_FEATURE_MSR 0x00000020 /* Model-Specific Registers, RDMSR, WRMSR */ -#define X86_FEATURE_PAE 0x00000040 /* Physical Address Extensions */ -#define X86_FEATURE_MCE 0x00000080 /* Machine Check Exceptions */ -#define X86_FEATURE_CX8 0x00000100 /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC 0x00000200 /* onboard APIC */ -#define X86_FEATURE_10 0x00000400 -#define X86_FEATURE_SEP 0x00000800 /* Fast System Call */ -#define X86_FEATURE_MTRR 0x00001000 /* Memory Type Range Registers */ -#define X86_FEATURE_PGE 0x00002000 /* Page Global Enable */ -#define X86_FEATURE_MCA 0x00004000 /* Machine Check Architecture */ -#define X86_FEATURE_CMOV 0x00008000 /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */ -#define X86_FEATURE_PAT 0x00010000 /* Page Attribute Table */ -#define X86_FEATURE_PSE36 0x00020000 /* 36-bit PSEs */ -#define X86_FEATURE_PN 0x00040000 -#define X86_FEATURE_19 0x00080000 -#define X86_FEATURE_20 0x00100000 -#define X86_FEATURE_21 0x00200000 -#define X86_FEATURE_22 0x00400000 -#define X86_FEATURE_MMX 0x00800000 /* Multimedia Extensions */ -#define X86_FEATURE_FXSR 0x01000000 /* FXSAVE and FXRSTOR instructions (fast save and restore of FPU context), and CR4.OSFXSR (OS uses these instructions) available */ -#define X86_FEATURE_XMM 0x02000000 /* Streaming SIMD Extensions */ -#define X86_FEATURE_26 0x04000000 -#define X86_FEATURE_27 0x08000000 -#define X86_FEATURE_28 0x10000000 -#define X86_FEATURE_29 0x20000000 -#define X86_FEATURE_30 0x40000000 -#define X86_FEATURE_AMD3D 0x80000000 - extern struct cpuinfo_x86 boot_cpu_data; extern struct tss_struct init_tss[NR_CPUS]; @@ -111,22 +79,15 @@ extern struct cpuinfo_x86 cpu_data[]; #define current_cpu_data boot_cpu_data #endif -#define cpu_has_pge \ - (boot_cpu_data.x86_capability & X86_FEATURE_PGE) -#define cpu_has_pse \ - (boot_cpu_data.x86_capability & X86_FEATURE_PSE) -#define cpu_has_pae \ - (boot_cpu_data.x86_capability & X86_FEATURE_PAE) -#define cpu_has_tsc \ - (boot_cpu_data.x86_capability & X86_FEATURE_TSC) -#define cpu_has_de \ - (boot_cpu_data.x86_capability & X86_FEATURE_DE) -#define cpu_has_vme \ - (boot_cpu_data.x86_capability & X86_FEATURE_VME) -#define cpu_has_fxsr \ - (boot_cpu_data.x86_capability & X86_FEATURE_FXSR) -#define cpu_has_xmm \ - (boot_cpu_data.x86_capability & X86_FEATURE_XMM) +#define cpu_has_pge (test_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability)) +#define cpu_has_pse (test_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability)) +#define cpu_has_pae (test_bit(X86_FEATURE_PAE, boot_cpu_data.x86_capability)) +#define cpu_has_tsc (test_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability)) +#define cpu_has_de (test_bit(X86_FEATURE_DE, boot_cpu_data.x86_capability)) +#define cpu_has_vme (test_bit(X86_FEATURE_VME, boot_cpu_data.x86_capability)) +#define cpu_has_fxsr (test_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability)) +#define cpu_has_xmm (test_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability)) +#define cpu_has_fpu (test_bit(X86_FEATURE_FPU, boot_cpu_data.x86_capability)) extern char ignore_irq13; @@ -135,7 +96,28 @@ extern void print_cpu_info(struct cpuinfo_x86 *); extern void dodgy_tsc(void); /* - * Generic CPUID function + * EFLAGS bits + */ +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ +#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ +#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ +#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ +#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ +#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ +#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ +#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ +#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ +#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ +#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ +#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ +#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ +#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ +#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ + +/* + * Generic CPUID function */ extern inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) { @@ -147,6 +129,45 @@ extern inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) : "a" (op)); } +/* + * CPUID functions returning a single datum + */ +extern inline unsigned int cpuid_eax(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + __asm__("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (op)); + return eax; +} +extern inline unsigned int cpuid_ebx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + __asm__("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (op)); + return ebx; +} +extern inline unsigned int cpuid_ecx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + __asm__("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (op)); + return ecx; +} +extern inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + __asm__("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (op)); + return edx; +} /* * Intel CPU features in CR4 @@ -220,7 +241,11 @@ static inline void clear_in_cr4 (unsigned long mask) /* * Bus types (default is ISA, but people can check others with these..) */ +#ifdef CONFIG_EISA extern int EISA_bus; +#else +#define EISA_bus (0) +#endif extern int MCA_bus; /* from system description table in BIOS. Mostly for MCA use, but @@ -441,4 +466,10 @@ struct microcode { #define MICROCODE_IOCFREE _IO('6',0) /* because it is for P6 */ +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +extern inline void rep_nop(void) +{ + __asm__ __volatile__("rep;nop"); +} + #endif /* __ASM_I386_PROCESSOR_H */ diff --git a/include/asm-i386/xor.h b/include/asm-i386/xor.h new file mode 100644 index 000000000..6a2230b8f --- /dev/null +++ b/include/asm-i386/xor.h @@ -0,0 +1,858 @@ +/* + * include/asm-i386/xor.h + * + * Optimized RAID-5 checksumming functions for MMX and SSE. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * High-speed RAID5 checksumming functions utilizing MMX instructions. + * Copyright (C) 1998 Ingo Molnar. + */ + +#define FPU_SAVE \ + do { \ + if (!(current->flags & PF_USEDFPU)) \ + __asm__ __volatile__ (" clts;\n"); \ + __asm__ __volatile__ ("fsave %0; fwait": "=m"(fpu_save[0])); \ + } while (0) + +#define FPU_RESTORE \ + do { \ + __asm__ __volatile__ ("frstor %0": : "m"(fpu_save[0])); \ + if (!(current->flags & PF_USEDFPU)) \ + stts(); \ + } while (0) + +#define LD(x,y) " movq 8*("#x")(%1), %%mm"#y" ;\n" +#define ST(x,y) " movq %%mm"#y", 8*("#x")(%1) ;\n" +#define XO1(x,y) " pxor 8*("#x")(%2), %%mm"#y" ;\n" +#define XO2(x,y) " pxor 8*("#x")(%3), %%mm"#y" ;\n" +#define XO3(x,y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" +#define XO4(x,y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" + + +static void +xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + ST(i,0) \ + XO1(i+1,1) \ + ST(i+1,1) \ + XO1(i+2,2) \ + ST(i+2,2) \ + XO1(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2) + : "memory"); + + FPU_RESTORE; +} + +static void +xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + ST(i,0) \ + XO2(i+1,1) \ + ST(i+1,1) \ + XO2(i+2,2) \ + ST(i+2,2) \ + XO2(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2), "r" (p3) + : "memory"); + + FPU_RESTORE; +} + +static void +xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + XO3(i,0) \ + ST(i,0) \ + XO3(i+1,1) \ + ST(i+1,1) \ + XO3(i+2,2) \ + ST(i+2,2) \ + XO3(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " addl $128, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2), "r" (p3), "r" (p4) + : "memory"); + + FPU_RESTORE; +} + +static void +xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + XO3(i,0) \ + XO3(i+1,1) \ + XO3(i+2,2) \ + XO3(i+3,3) \ + XO4(i,0) \ + ST(i,0) \ + XO4(i+1,1) \ + ST(i+1,1) \ + XO4(i+2,2) \ + ST(i+2,2) \ + XO4(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " addl $128, %4 ;\n" + " addl $128, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "g" (lines), + "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5) + : "memory"); + + FPU_RESTORE; +} + +#undef LD +#undef XO1 +#undef XO2 +#undef XO3 +#undef XO4 +#undef ST +#undef BLOCK + +static void +xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( + " .align 32 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 8(%2), %%mm1 ;\n" + " movq 24(%1), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " pxor 16(%2), %%mm2 ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq 40(%1), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%2), %%mm4 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq 56(%1), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%2), %%mm6 ;\n" + " pxor 56(%2), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2) + : "memory"); + + FPU_RESTORE; +} + +static void +xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " movq 24(%1), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 32(%2), %%mm4 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " movq 56(%1), %%mm7 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 56(%2), %%mm7 ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2), "r" (p3) + : "memory" ); + + FPU_RESTORE; +} + +static void +xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor (%4), %%mm0 ;\n" + " movq 24(%1), %%mm3 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " pxor 8(%4), %%mm1 ;\n" + " movq %%mm0, (%1) ;\n" + " movq 32(%1), %%mm4 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " pxor 16(%4), %%mm2 ;\n" + " movq %%mm1, 8(%1) ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 32(%2), %%mm4 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 40(%2), %%mm5 ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 24(%4), %%mm3 ;\n" + " movq %%mm3, 24(%1) ;\n" + " movq 56(%1), %%mm7 ;\n" + " movq 48(%1), %%mm6 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " pxor 32(%4), %%mm4 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 56(%2), %%mm7 ;\n" + " pxor 40(%4), %%mm5 ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%4), %%mm6 ;\n" + " pxor 56(%4), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " addl $64, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2), "r" (p3), "r" (p4) + : "memory"); + + FPU_RESTORE; +} + +static void +xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " pxor (%4), %%mm0 ;\n" + " pxor 8(%4), %%mm1 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " movq 24(%1), %%mm3 ;\n" + " pxor (%5), %%mm0 ;\n" + " pxor 8(%5), %%mm1 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 16(%4), %%mm2 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " pxor 16(%5), %%mm2 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 24(%4), %%mm3 ;\n" + " pxor 32(%2), %%mm4 ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 24(%5), %%mm3 ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%4), %%mm4 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq 56(%1), %%mm7 ;\n" + " pxor 32(%5), %%mm4 ;\n" + " pxor 40(%4), %%mm5 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " pxor 56(%2), %%mm7 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " pxor 40(%5), %%mm5 ;\n" + " pxor 48(%4), %%mm6 ;\n" + " pxor 56(%4), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%5), %%mm6 ;\n" + " pxor 56(%5), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " addl $64, %4 ;\n" + " addl $64, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "g" (lines), + "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5) + : "memory"); + + FPU_RESTORE; +} + +static struct xor_block_template xor_block_pII_mmx = { + name: "pII_mmx", + do_2: xor_pII_mmx_2, + do_3: xor_pII_mmx_3, + do_4: xor_pII_mmx_4, + do_5: xor_pII_mmx_5, +}; + +static struct xor_block_template xor_block_p5_mmx = { + name: "p5_mmx", + do_2: xor_p5_mmx_2, + do_3: xor_p5_mmx_3, + do_4: xor_p5_mmx_4, + do_5: xor_p5_mmx_5, +}; + +#undef FPU_SAVE +#undef FPU_RESTORE + +/* + * Cache avoiding checksumming functions utilizing KNI instructions + * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) + */ + +#define XMMS_SAVE \ + __asm__ __volatile__ ( \ + "movl %%cr0,%0 ;\n\t" \ + "clts ;\n\t" \ + "movups %%xmm0,(%1) ;\n\t" \ + "movups %%xmm1,0x10(%1) ;\n\t" \ + "movups %%xmm2,0x20(%1) ;\n\t" \ + "movups %%xmm3,0x30(%1) ;\n\t" \ + : "=r" (cr0) \ + : "r" (xmm_save) \ + : "memory") + +#define XMMS_RESTORE \ + __asm__ __volatile__ ( \ + "sfence ;\n\t" \ + "movups (%1),%%xmm0 ;\n\t" \ + "movups 0x10(%1),%%xmm1 ;\n\t" \ + "movups 0x20(%1),%%xmm2 ;\n\t" \ + "movups 0x30(%1),%%xmm3 ;\n\t" \ + "movl %0,%%cr0 ;\n\t" \ + : \ + : "r" (cr0), "r" (xmm_save) \ + : "memory") + +#define OFFS(x) "16*("#x")" +#define PF0(x) " prefetcht0 "OFFS(x)"(%1) ;\n" +#define LD(x,y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n" +#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n" +#define PF1(x) " prefetchnta "OFFS(x)"(%2) ;\n" +#define PF2(x) " prefetchnta "OFFS(x)"(%3) ;\n" +#define PF3(x) " prefetchnta "OFFS(x)"(%4) ;\n" +#define PF4(x) " prefetchnta "OFFS(x)"(%5) ;\n" +#define PF5(x) " prefetchnta "OFFS(x)"(%6) ;\n" +#define XO1(x,y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n" +#define XO2(x,y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n" +#define XO3(x,y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n" +#define XO4(x,y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n" +#define XO5(x,y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n" + + +static void +xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4]; + int cr0; + + XMMS_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + PF1(i) \ + PF1(i+2) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF0(i+4) \ + PF0(i+6) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2) + : "memory"); + + XMMS_RESTORE; +} + +static void +xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4]; + int cr0; + + XMMS_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i+2) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF2(i) \ + PF2(i+2) \ + PF0(i+4) \ + PF0(i+6) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " addl $256, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r"(p2), "r"(p3) + : "memory" ); + + XMMS_RESTORE; +} + +static void +xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4]; + int cr0; + + XMMS_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i+2) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF2(i) \ + PF2(i+2) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + PF3(i) \ + PF3(i+2) \ + PF0(i+4) \ + PF0(i+6) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + XO3(i,0) \ + XO3(i+1,1) \ + XO3(i+2,2) \ + XO3(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " addl $256, %3 ;\n" + " addl $256, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2), "r" (p3), "r" (p4) + : "memory" ); + + XMMS_RESTORE; +} + +static void +xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4]; + int cr0; + + XMMS_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i+2) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF2(i) \ + PF2(i+2) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + PF3(i) \ + PF3(i+2) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + PF4(i) \ + PF4(i+2) \ + PF0(i+4) \ + PF0(i+6) \ + XO3(i,0) \ + XO3(i+1,1) \ + XO3(i+2,2) \ + XO3(i+3,3) \ + XO4(i,0) \ + XO4(i+1,1) \ + XO4(i+2,2) \ + XO4(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " addl $256, %3 ;\n" + " addl $256, %4 ;\n" + " addl $256, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5) + : "memory"); + + XMMS_RESTORE; +} + +static struct xor_block_template xor_block_pIII_sse = { + name: "pIII_sse", + do_2: xor_sse_2, + do_3: xor_sse_3, + do_4: xor_sse_4, + do_5: xor_sse_5, +}; + +/* Also try the generic routines. */ +#include <asm-generic/xor.h> + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_32regs); \ + if (cpu_has_xmm) \ + xor_speed(&xor_block_pIII_sse); \ + if (md_cpu_has_mmx()) { \ + xor_speed(&xor_block_pII_mmx); \ + xor_speed(&xor_block_p5_mmx); \ + } \ + } while (0) + +/* We force the use of the SSE xor block because it can write around L2. + We may also be able to load into the L1 only depending on how the cpu + deals with a load to a line that is being prefetched. */ +#define XOR_SELECT_TEMPLATE(FASTEST) \ + (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) |