diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1999-12-04 03:58:56 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1999-12-04 03:58:56 +0000 |
commit | 1d67e90f19a7acfd9a05dc59678e7d0c5090bd0d (patch) | |
tree | 357efc7b93f8f5102110d20d293f41360ec212fc /arch/i386/lib | |
parent | aea27b2e18d69af87e673972246e66657b4fa274 (diff) |
Merge with Linux 2.3.21.
Diffstat (limited to 'arch/i386/lib')
-rw-r--r-- | arch/i386/lib/Makefile | 4 | ||||
-rw-r--r-- | arch/i386/lib/mmx.c | 236 | ||||
-rw-r--r-- | arch/i386/lib/usercopy.c | 53 |
3 files changed, 285 insertions, 8 deletions
diff --git a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile index a6f8dff09..3f7bef4aa 100644 --- a/arch/i386/lib/Makefile +++ b/arch/i386/lib/Makefile @@ -9,4 +9,8 @@ L_TARGET = lib.a L_OBJS = checksum.o old-checksum.o delay.o \ usercopy.o getuser.o putuser.o +ifdef CONFIG_X86_USE_3DNOW +L_OBJS += mmx.o +endif + include $(TOPDIR)/Rules.make diff --git a/arch/i386/lib/mmx.c b/arch/i386/lib/mmx.c new file mode 100644 index 000000000..5257aeba6 --- /dev/null +++ b/arch/i386/lib/mmx.c @@ -0,0 +1,236 @@ +#include <linux/types.h> +#include <linux/string.h> +#include <linux/sched.h> + +/* + * MMX 3DNow! library helper functions + * + * To do: + * We can use MMX just for prefetch in IRQ's. This may be a win. + * (reported so on K6-III) + * We should use a better code neutral filler for the short jump + * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ?? + * We also want to clobber the filler register so we dont get any + * register forwarding stalls on the filler. + * + * Add *user handling. Checksums are not a win with MMX on any CPU + * tested so far for any MMX solution figured. + * + */ + +void *_mmx_memcpy(void *to, const void *from, size_t len) +{ + void *p=to; + int i= len >> 6; /* len/64 */ + + if (!(current->flags & PF_USEDFPU)) + clts(); + else + { + __asm__ __volatile__ ( " fnsave %0; fwait\n"::"m"(current->thread.i387)); + current->flags &= ~PF_USEDFPU; + } + + __asm__ __volatile__ ( + "1: prefetch (%0)\n" /* This set is 28 bytes */ + " prefetch 64(%0)\n" + " prefetch 128(%0)\n" + " prefetch 192(%0)\n" + " prefetch 256(%0)\n" + "2: \n" + ".section .fixup, \"ax\"\n" + "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from) ); + + + for(; i>0; i--) + { + __asm__ __volatile__ ( + "1: prefetch 320(%0)\n" + "2: movq (%0), %%mm0\n" + " movq 8(%0), %%mm1\n" + " movq 16(%0), %%mm2\n" + " movq 24(%0), %%mm3\n" + " movq %%mm0, (%1)\n" + " movq %%mm1, 8(%1)\n" + " movq %%mm2, 16(%1)\n" + " movq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm0\n" + " movq 40(%0), %%mm1\n" + " movq 48(%0), %%mm2\n" + " movq 56(%0), %%mm3\n" + " movq %%mm0, 32(%1)\n" + " movq %%mm1, 40(%1)\n" + " movq %%mm2, 48(%1)\n" + " movq %%mm3, 56(%1)\n" + ".section .fixup, \"ax\"\n" + "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + /* + * Now do the tail of the block + */ + __memcpy(to, from, len&63); + stts(); + return p; +} + +static void fast_clear_page(long page) +{ + int i; + if (!(current->flags & PF_USEDFPU)) + clts(); + else + { + __asm__ __volatile__ ( " fnsave %0; fwait\n"::"m"(current->thread.i387)); + current->flags &= ~PF_USEDFPU; + } + + __asm__ __volatile__ ( + " pxor %%mm0, %%mm0\n" : : + ); + + for(i=0;i<4096/128;i++) + { + __asm__ __volatile__ ( + " movq %%mm0, (%0)\n" + " movq %%mm0, 8(%0)\n" + " movq %%mm0, 16(%0)\n" + " movq %%mm0, 24(%0)\n" + " movq %%mm0, 32(%0)\n" + " movq %%mm0, 40(%0)\n" + " movq %%mm0, 48(%0)\n" + " movq %%mm0, 56(%0)\n" + " movq %%mm0, 64(%0)\n" + " movq %%mm0, 72(%0)\n" + " movq %%mm0, 80(%0)\n" + " movq %%mm0, 88(%0)\n" + " movq %%mm0, 96(%0)\n" + " movq %%mm0, 104(%0)\n" + " movq %%mm0, 112(%0)\n" + " movq %%mm0, 120(%0)\n" + : : "r" (page) : "memory"); + page+=128; + } + stts(); +} + +static void fast_copy_page(long to, long from) +{ + int i; + if (!(current->flags & PF_USEDFPU)) + clts(); + else + { + __asm__ __volatile__ ( " fnsave %0; fwait\n"::"m"(current->thread.i387)); + current->flags &= ~PF_USEDFPU; + } + + __asm__ __volatile__ ( + "1: prefetch (%0)\n" + " prefetch 64(%0)\n" + " prefetch 128(%0)\n" + " prefetch 192(%0)\n" + " prefetch 256(%0)\n" + "2: \n" + ".section .fixup, \"ax\"\n" + "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from) ); + + for(i=0; i<4096/64; i++) + { + __asm__ __volatile__ ( + "1: prefetch 320(%0)\n" + "2: movq (%0), %%mm0\n" + " movq 8(%0), %%mm1\n" + " movq 16(%0), %%mm2\n" + " movq 24(%0), %%mm3\n" + " movq %%mm0, (%1)\n" + " movq %%mm1, 8(%1)\n" + " movq %%mm2, 16(%1)\n" + " movq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm0\n" + " movq 40(%0), %%mm1\n" + " movq 48(%0), %%mm2\n" + " movq 56(%0), %%mm3\n" + " movq %%mm0, 32(%1)\n" + " movq %%mm1, 40(%1)\n" + " movq %%mm2, 48(%1)\n" + " movq %%mm3, 56(%1)\n" + ".section .fixup, \"ax\"\n" + "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + stts(); +} + +/* + * Favour MMX for page clear and copy. + */ + +static void slow_zero_page(long page) +{ + int d0, d1; + __asm__ __volatile__( \ + "cld\n\t" \ + "rep ; stosl" \ + : "=&c" (d0), "=&D" (d1) + :"a" (0),"1" (page),"0" (1024) + :"memory"); +} + +void mmx_clear_page(long page) +{ + if(in_interrupt()) + slow_zero_page(page); + else + fast_clear_page(page); +} + +static void slow_copy_page(long to, long from) +{ + int d0, d1, d2; + __asm__ __volatile__( \ + "cld\n\t" \ + "rep ; movsl" \ + : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ + : "0" (1024),"1" ((long) to),"2" ((long) from) \ + : "memory"); +} + + +void mmx_copy_page(long to, long from) +{ + if(in_interrupt()) + slow_copy_page(to, from); + else + fast_copy_page(to, from); +} diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c index f43be511f..21d8fdf62 100644 --- a/arch/i386/lib/usercopy.c +++ b/arch/i386/lib/usercopy.c @@ -6,6 +6,37 @@ * Copyright 1997 Linus Torvalds */ #include <asm/uaccess.h> +#include <asm/mmx.h> + +#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS + +unsigned long +__generic_copy_to_user(void *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + { + if(n<512) + __copy_user(to,from,n); + else + mmx_copy_user(to,from,n); + } + return n; +} + +unsigned long +__generic_copy_from_user(void *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + { + if(n<512) + __copy_user_zeroing(to,from,n); + else + mmx_copy_user_zeroing(to, from, n); + } + return n; +} + +#else unsigned long __generic_copy_to_user(void *to, const void *from, unsigned long n) @@ -23,6 +54,7 @@ __generic_copy_from_user(void *to, const void *from, unsigned long n) return n; } +#endif /* * Copy a null terminated string from userspace. @@ -117,26 +149,31 @@ __clear_user(void *to, unsigned long n) /* * Return the size of a string (including the ending 0) * - * Return 0 for error + * Return 0 on exception, a value greater than N if too long */ -long strlen_user(const char *s) +long strnlen_user(const char *s, long n) { - unsigned long res; + unsigned long mask = -__addr_ok(s); + unsigned long res, tmp; __asm__ __volatile__( + " andl %0,%%ecx\n" "0: repne; scasb\n" - " notl %0\n" + " setne %%al\n" + " subl %%ecx,%0\n" + " addl %0,%%eax\n" "1:\n" ".section .fixup,\"ax\"\n" - "2: xorl %0,%0\n" + "2: xorl %%eax,%%eax\n" " jmp 1b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 0b,2b\n" ".previous" - :"=c" (res), "=D" (s) - :"1" (s), "a" (0), "0" (-__addr_ok(s))); - return res & -__addr_ok(s); + :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) + :"0" (n), "1" (s), "2" (0), "3" (mask) + :"cc"); + return res & mask; } |