diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-06-19 22:45:37 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-06-19 22:45:37 +0000 |
commit | 6d403070f28cd44860fdb3a53be5da0275c65cf4 (patch) | |
tree | 0d0e7fe7b5fb7568d19e11d7d862b77a866ce081 /include/asm-i386 | |
parent | ecf1bf5f6c2e668d03b0a9fb026db7aa41e292e1 (diff) |
Merge with 2.4.0-test1-ac21 + pile of MIPS cleanups to make merging
possible. Chainsawed RM200 kernel to compile again. Jazz machine
status unknown.
Diffstat (limited to 'include/asm-i386')
-rw-r--r-- | include/asm-i386/bugs.h | 53 | ||||
-rw-r--r-- | include/asm-i386/processor.h | 180 | ||||
-rw-r--r-- | include/asm-i386/ptrace.h | 2 | ||||
-rw-r--r-- | include/asm-i386/semaphore.h | 3 | ||||
-rw-r--r-- | include/asm-i386/sigcontext.h | 53 | ||||
-rw-r--r-- | include/asm-i386/user.h | 33 |
6 files changed, 216 insertions, 108 deletions
diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h index 34df19c05..80ba0d2e4 100644 --- a/include/asm-i386/bugs.h +++ b/include/asm-i386/bugs.h @@ -8,6 +8,9 @@ * <rreilova@ececs.uc.edu> * - Channing Corn (tests & fixes), * - Andrew D. Balsa (code cleanup). + * + * Pentium III FXSR, SSE support + * Gareth Hughes <gareth@valinux.com>, May 2000 */ /* @@ -46,7 +49,7 @@ static int __init no_387(char *s) __setup("no387", no_387); -static char __initdata fpu_error = 0; +static char fpu_error __initdata = 0; static void __init copro_timeout(void) { @@ -59,8 +62,13 @@ static void __init copro_timeout(void) outb_p(0,0xf0); } -static double __initdata x = 4195835.0; -static double __initdata y = 3145727.0; +static double x __initdata = 4195835.0; +static double y __initdata = 3145727.0; + +#ifdef CONFIG_X86_XMM +static float zero[4] __initdata = { 0.0, 0.0, 0.0, 0.0 }; +static float one[4] __initdata = { 1.0, 1.0, 1.0, 1.0 }; +#endif static void __init check_fpu(void) { @@ -139,6 +147,37 @@ static void __init check_fpu(void) printk("OK, FPU using exception 16 error reporting.\n"); else printk("Hmm, FPU using exception 16 error reporting with FDIV bug.\n"); + +#ifdef CONFIG_X86_FXSR + /* + * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. + */ + if (offsetof(struct task_struct, thread.i387.hard.fxsr_space[0]) & 15) + panic("Kernel compiled for PII/PIII+ with FXSR, data not 16-byte aligned!"); + + if (cpu_has_fxsr) { + printk(KERN_INFO "Enabling fast FPU save and restore... "); + set_in_cr4(X86_CR4_OSFXSR); + printk("done.\n"); + } +#endif +#ifdef CONFIG_X86_XMM + if (cpu_has_xmm) { + printk(KERN_INFO "Enabling unmasked SIMD FPU exception support... "); + set_in_cr4(X86_CR4_OSXMMEXCPT); + printk("done.\n"); + + /* Check if exception 19 works okay. */ + set_fpu_mxcsr(XMM_UNMASKED_MXCSR); + printk(KERN_INFO "Checking SIMD FPU exceptions... "); + __asm__("movups %0,%%xmm0\n\t" + "movups %1,%%xmm1\n\t" + "divps %%xmm0,%%xmm1\n\t" + : : "m" (*&zero), "m" (*&one)); + printk("OK, SIMD FPU using exception 19 error reporting.\n"); + set_fpu_mxcsr(XMM_DEFAULT_MXCSR); + } +#endif } static void __init check_hlt(void) @@ -424,6 +463,14 @@ static void __init check_config(void) && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) panic("Kernel compiled for PPro+, assumes a local APIC without the read-before-write bug!"); #endif + +/* + * If we configured ourselves for FXSR, we'd better have it. + */ +#ifdef CONFIG_X86_FXSR + if (!cpu_has_fxsr) + panic("Kernel compiled for PII/PIII+, requires FXSR feature!"); +#endif } static void __init check_bugs(void) diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 729c11e77..76fd66fd8 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -2,6 +2,9 @@ * include/asm-i386/processor.h * * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * Gareth Hughes <gareth@valinux.com>, May 2000 */ #ifndef __ASM_I386_PROCESSOR_H @@ -90,17 +93,15 @@ struct cpuinfo_x86 { #define X86_FEATURE_20 0x00100000 #define X86_FEATURE_21 0x00200000 #define X86_FEATURE_22 0x00400000 -#define X86_FEATURE_MMX 0x00800000 /* multimedia extensions */ +#define X86_FEATURE_MMX 0x00800000 /* Multimedia Extensions */ #define X86_FEATURE_FXSR 0x01000000 /* FXSAVE and FXRSTOR instructions (fast save and restore of FPU context), and CR4.OSFXSR (OS uses these instructions) available */ -#define X86_FEATURE_XMM 0x02000000 /* Intel MMX2 instruction set */ +#define X86_FEATURE_XMM 0x02000000 /* Streaming SIMD Extensions */ #define X86_FEATURE_26 0x04000000 #define X86_FEATURE_27 0x08000000 #define X86_FEATURE_28 0x10000000 #define X86_FEATURE_29 0x20000000 #define X86_FEATURE_30 0x40000000 #define X86_FEATURE_AMD3D 0x80000000 -#define X86_CR4_OSFXSR 0x0200 /* fast FPU save/restore */ -#define X86_CR4_OSXMMEXCPT 0x0400 /* KNI (MMX2) unmasked exception 16 */ extern struct cpuinfo_x86 boot_cpu_data; extern struct tss_struct init_tss[NR_CPUS]; @@ -125,6 +126,10 @@ extern struct cpuinfo_x86 cpu_data[]; (boot_cpu_data.x86_capability & X86_FEATURE_DE) #define cpu_has_vme \ (boot_cpu_data.x86_capability & X86_FEATURE_VME) +#define cpu_has_fxsr \ + (boot_cpu_data.x86_capability & X86_FEATURE_FXSR) +#define cpu_has_xmm \ + (boot_cpu_data.x86_capability & X86_FEATURE_XMM) extern char ignore_irq13; @@ -142,23 +147,24 @@ extern inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) "=b" (*ebx), "=c" (*ecx), "=d" (*edx) - : "a" (op) - : "cc"); + : "a" (op)); } /* * Intel CPU features in CR4 */ -#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ -#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ -#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ -#define X86_CR4_DE 0x0008 /* enable debugging extensions */ -#define X86_CR4_PSE 0x0010 /* enable page size extensions */ -#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ -#define X86_CR4_MCE 0x0040 /* Machine check enable */ -#define X86_CR4_PGE 0x0080 /* enable global pages */ -#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ +#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ +#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ +#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ +#define X86_CR4_DE 0x0008 /* enable debugging extensions */ +#define X86_CR4_PSE 0x0010 /* enable page size extensions */ +#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ +#define X86_CR4_MCE 0x0040 /* Machine check enable */ +#define X86_CR4_PGE 0x0080 /* enable global pages */ +#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ +#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ +#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ /* * Save the cr4 feature set we're using (ie @@ -244,21 +250,7 @@ extern unsigned int mca_pentium_flag; #define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) #define INVALID_IO_BITMAP_OFFSET 0x8000 -#ifndef CONFIG_X86_FX - -#define i387_save_hard(x) \ - __asm__("fnsave %0\n\tfwait": :"m" (x)) -#define i387_restore_hard(x) \ - __asm__("frstor %0": :"m" (x)) - -#define i387_hard_to_user(uaddr, x) \ - __copy_to_user((uaddr), (x), sizeof(struct i387_hard_struct)) -#define i387_user_to_hard(x, uaddr) \ - __copy_from_user((x), (uaddr), sizeof(struct i387_hard_struct)) - -#define i387_set_cwd(x,v) do { (x).cwd = 0xffff0000 | (v); } while (0) -#define i387_set_swd(x,v) do { (x).swd = 0xffff0000 | (v); } while (0) -#define i387_set_twd(x,v) do { (x).twd = 0xffff0000 | (v); } while (0) +#ifndef CONFIG_X86_FXSR struct i387_hard_struct { long cwd; @@ -274,65 +266,29 @@ struct i387_hard_struct { #else -/* - * has to be 128-bit aligned - */ +/* It doesn't matter if the CPU writes only part of this struct; it gets + * copied by do_fork, so the unimplemented area never changes from what + * init_task.i387 is initialized to (all zeroes). */ + struct i387_hard_struct { - unsigned short cwd; - unsigned short swd; - unsigned short twd; - unsigned short fopcode; - unsigned int fip; - unsigned short fcs; - unsigned short __reserved_01; - unsigned int fdp; - unsigned short fds; - unsigned short __reserved_02; - unsigned int mxcsr; - unsigned int __reserved_03; - unsigned int st_space[32]; /* 8*16 bytes for each FP/MMX-reg = 128 bytes */ - unsigned int xmm_space[22*4]; /* 22 cachelines for MMX2 registers */ - unsigned long status; + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long status; /* software status information */ + long fxsr_space[6]; /* FXSR FPU environment must not be used */ + long mxcsr; + long reserved; + long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ + long padding[56]; } __attribute__ ((aligned (16))); -/* - * tag word conversion (thanks to Gabriel Paubert for noticing the - * subtle format difference and implementing these functions) - * - * there are several erratas wrt. the tag word in the i387, thus - * any software relying on it's value is questionable, but we - * definitely want to be as close as possible. - */ -static inline unsigned short fputag_KNIto387(unsigned char tb) { - unsigned short tw = tb; - tw = ((tw<<4) | tw) &0x0f0f; /* zzzz7654zzzz3210 */ - tw = ((tw<<2) | tw) &0x3333; /* zz76zz54zz32zz10 */ - tw = ((tw<<1) | tw) &0x5555; /* z7z6z5z4z3z2z1z0 */ - return ~(tw*3); -} - -static inline unsigned char fputag_387toKNI(unsigned short tw) { - tw = ~tw; - tw = (tw | (tw>>1)) & 0x5555; /* z7z6z5z4z3z2z1z0 */ - tw = (tw | (tw>>1)) & 0x3333; /* zz76zz54zz32zz10 */ - tw = (tw | (tw>>3)) & 0x0f0f; /* zzzz7654zzzz3210 */ - return (tw|(tw>>4)) & 0x00ff; /* zzzzzzzz76543210 */ -} - -#define i387_set_cwd(x,v) do { (x).cwd = (short)(v); } while (0) -#define i387_set_swd(x,v) do { (x).swd = (short)(v); } while (0) -#define i387_set_twd(x,v) do { (x).twd = fputag_387toKNI(v); } while (0) - -#define i387_save_hard(x) \ - { __asm__ __volatile__(".byte 0x0f, 0xae, 0x06": :"S" (&(x))); } while (0) - -#define i387_restore_hard(x) \ -do { __asm__ __volatile__(".byte 0x0f, 0xae, 0x4f, 0x00": :"D" (&(x))); } while(0) - -extern int i387_hard_to_user ( struct _fpstate * user, - struct i387_hard_struct * hard); -extern int i387_user_to_hard (struct i387_hard_struct * hard, - struct _fpstate * user); +#define X86_FXSR_MAGIC 0x0000 +#define X86_FXSR_SIZE 512 #endif struct i387_soft_struct { @@ -469,12 +425,48 @@ extern void forget_segments(void); /* * FPU lazy state save handling.. */ +#ifndef CONFIG_X86_FXSR + +#define save_fpu(tsk) do { \ + asm volatile("fnsave %0 ; fwait" \ + : "=m" (tsk->thread.i387.hard)); \ + tsk->flags &= ~PF_USEDFPU; \ + stts(); \ +} while (0) + +#define save_init_fpu(tsk) save_fpu(tsk) + +#define restore_fpu(tsk) do { \ + asm volatile("frstor %0" \ + : : "m" (tsk->thread.i387.hard)); \ +} while (0) + +#else /* CONFIG_X86_FXSR */ + #define save_fpu(tsk) do { \ - i387_save_hard(tsk->thread.i387); \ + asm volatile("fnstenv %0 ; fxsave %1 ; fwait" \ + : "=m" (tsk->thread.i387.hard), \ + "=m" (tsk->thread.i387.hard.fxsr_space[0])); \ + tsk->flags &= ~PF_USEDFPU; \ + stts(); \ +} while (0) + +#define save_init_fpu(tsk) do { \ + asm volatile("fnstenv %0 ; fxsave %1 ; fnclex" \ + : "=m" (tsk->thread.i387.hard), \ + "=m" (tsk->thread.i387.hard.fxsr_space[0])); \ tsk->flags &= ~PF_USEDFPU; \ stts(); \ } while (0) +#define restore_fpu(tsk) do { \ + asm volatile("fxrstor %0 ; fldenv %1" \ + : : "m" (tsk->thread.i387.hard.fxsr_space[0]), \ + "m" (tsk->thread.i387.hard)); \ +} while (0) + +#endif /* CONFIG_X86_FXSR */ + #define unlazy_fpu(tsk) do { \ if (tsk->flags & PF_USEDFPU) \ save_fpu(tsk); \ @@ -487,6 +479,18 @@ extern void forget_segments(void); } \ } while (0) +#ifdef CONFIG_X86_XMM +#define XMM_DEFAULT_MXCSR 0x1f80 +#define XMM_UNMASKED_MXCSR 0x0000 + +#define set_fpu_mxcsr(val) do { \ + if (cpu_has_xmm) { \ + unsigned long __mxcsr = ((unsigned long)(val) & 0xffff); \ + asm volatile("ldmxcsr %0" : : "m" (__mxcsr)); \ + } \ +} while (0) +#endif + /* * Return saved PC of a blocked thread. */ @@ -519,4 +523,6 @@ struct microcode { unsigned int bits[500]; }; +#define MICROCODE_IOCFREE _IO('6',0) /* because it is for P6 */ + #endif /* __ASM_I386_PROCESSOR_H */ diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h index 985d28829..05189f6f3 100644 --- a/include/asm-i386/ptrace.h +++ b/include/asm-i386/ptrace.h @@ -46,6 +46,8 @@ struct pt_regs { #define PTRACE_SETREGS 13 #define PTRACE_GETFPREGS 14 #define PTRACE_SETFPREGS 15 +#define PTRACE_GETXFPREGS 25 +#define PTRACE_SETXFPREGS 26 #ifdef __KERNEL__ #define user_mode(regs) ((VM_MASK & (regs)->eflags) || (3 & (regs)->xcs)) diff --git a/include/asm-i386/semaphore.h b/include/asm-i386/semaphore.h index 827c53192..9088efe2c 100644 --- a/include/asm-i386/semaphore.h +++ b/include/asm-i386/semaphore.h @@ -3,6 +3,8 @@ #include <linux/linkage.h> +#ifdef __KERNEL__ + /* * SMP- and interrupt-safe semaphores.. * @@ -373,3 +375,4 @@ extern inline void up_write(struct rw_semaphore *sem) } #endif +#endif diff --git a/include/asm-i386/sigcontext.h b/include/asm-i386/sigcontext.h index bb66dbe49..77ef66680 100644 --- a/include/asm-i386/sigcontext.h +++ b/include/asm-i386/sigcontext.h @@ -1,34 +1,55 @@ #ifndef _ASMi386_SIGCONTEXT_H #define _ASMi386_SIGCONTEXT_H -#include <linux/config.h> /* * As documented in the iBCS2 standard.. * - * The first part of "struct _fpstate" is just the - * normal i387 hardware setup, the extra "status" - * word is used to save the coprocessor status word - * before entering the handler. + * The first part of "struct _fpstate" is just the normal i387 + * hardware setup, the extra "status" word is used to save the + * coprocessor status word before entering the handler. + * + * Pentium III FXSR, SSE support + * Gareth Hughes <gareth@valinux.com>, May 2000 + * + * The FPU state data structure has had to grow to accomodate the + * extended FPU state required by the Streaming SIMD Extensions. + * There is no documented standard to accomplish this at the moment. */ struct _fpreg { unsigned short significand[4]; unsigned short exponent; }; +struct _fpxreg { + unsigned short significand[4]; + unsigned short exponent; + unsigned short padding[3]; +}; + +struct _xmmreg { + unsigned long element[4]; +}; + struct _fpstate { - unsigned long cw, - sw, - tag, - ipoff, - cssel, - dataoff, - datasel; + /* Regular FPU environment */ + unsigned long cw; + unsigned long sw; + unsigned long tag; + unsigned long ipoff; + unsigned long cssel; + unsigned long dataoff; + unsigned long datasel; struct _fpreg _st[8]; - unsigned long status; -#ifdef CONFIG_X86_FX + unsigned short status; + unsigned short magic; /* 0xffff = regular FPU data only */ + + /* FXSR FPU environment */ + unsigned long _fxsr_env[6]; /* FXSR FPU env is ignored */ unsigned long mxcsr; - unsigned long _xmm[4*22]; -#endif + unsigned long reserved; + struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */ + struct _xmmreg _xmm[8]; + unsigned long padding[56]; }; struct sigcontext { diff --git a/include/asm-i386/user.h b/include/asm-i386/user.h index 2d0849738..e034118f3 100644 --- a/include/asm-i386/user.h +++ b/include/asm-i386/user.h @@ -30,17 +30,46 @@ The minimum core file size is 3 pages, or 12288 bytes. */ +/* + * Pentium III FXSR, SSE support + * Gareth Hughes <gareth@valinux.com>, May 2000 + * + * Provide support for the GDB 5.0 PTRACE_*XFPREGS requests for interacting + * with the FXSR-format floating point environment. Floating point data + * can be accessed in the regular FSAVE format in the usual manner, and the + * XMM register/control data can be accessed via the new ptrace requests. + * Note that the floating point environment contained in the FXSR format + * is ignored and any changes to these fields will be lost. + */ + struct user_i387_struct { long cwd; long swd; long twd; long fip; long fcs; - long fdp; - long fds; + long foo; + long fos; long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ }; +struct user_xfpregs_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long __reserved_00; + long fxsr_space[6]; /* FXSR FPU environment must not be used */ + long mxcsr; + long __reserved_01; + long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ + long __reserved_02[56]; +}; + /* * This is the old layout of "struct pt_regs", and * is still the layout used by user mode (the new |