summaryrefslogtreecommitdiffstats
path: root/arch/i386/lib
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1999-12-04 03:58:56 +0000
committerRalf Baechle <ralf@linux-mips.org>1999-12-04 03:58:56 +0000
commit1d67e90f19a7acfd9a05dc59678e7d0c5090bd0d (patch)
tree357efc7b93f8f5102110d20d293f41360ec212fc /arch/i386/lib
parentaea27b2e18d69af87e673972246e66657b4fa274 (diff)
Merge with Linux 2.3.21.
Diffstat (limited to 'arch/i386/lib')
-rw-r--r--arch/i386/lib/Makefile4
-rw-r--r--arch/i386/lib/mmx.c236
-rw-r--r--arch/i386/lib/usercopy.c53
3 files changed, 285 insertions, 8 deletions
diff --git a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
index a6f8dff09..3f7bef4aa 100644
--- a/arch/i386/lib/Makefile
+++ b/arch/i386/lib/Makefile
@@ -9,4 +9,8 @@ L_TARGET = lib.a
L_OBJS = checksum.o old-checksum.o delay.o \
usercopy.o getuser.o putuser.o
+ifdef CONFIG_X86_USE_3DNOW
+L_OBJS += mmx.o
+endif
+
include $(TOPDIR)/Rules.make
diff --git a/arch/i386/lib/mmx.c b/arch/i386/lib/mmx.c
new file mode 100644
index 000000000..5257aeba6
--- /dev/null
+++ b/arch/i386/lib/mmx.c
@@ -0,0 +1,236 @@
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+
+/*
+ * MMX 3DNow! library helper functions
+ *
+ * To do:
+ * We can use MMX just for prefetch in IRQ's. This may be a win.
+ * (reported so on K6-III)
+ * We should use a better code neutral filler for the short jump
+ * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
+ * We also want to clobber the filler register so we dont get any
+ * register forwarding stalls on the filler.
+ *
+ * Add *user handling. Checksums are not a win with MMX on any CPU
+ * tested so far for any MMX solution figured.
+ *
+ */
+
+void *_mmx_memcpy(void *to, const void *from, size_t len)
+{
+ void *p=to;
+ int i= len >> 6; /* len/64 */
+
+ if (!(current->flags & PF_USEDFPU))
+ clts();
+ else
+ {
+ __asm__ __volatile__ ( " fnsave %0; fwait\n"::"m"(current->thread.i387));
+ current->flags &= ~PF_USEDFPU;
+ }
+
+ __asm__ __volatile__ (
+ "1: prefetch (%0)\n" /* This set is 28 bytes */
+ " prefetch 64(%0)\n"
+ " prefetch 128(%0)\n"
+ " prefetch 192(%0)\n"
+ " prefetch 256(%0)\n"
+ "2: \n"
+ ".section .fixup, \"ax\"\n"
+ "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
+ " jmp 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b, 3b\n"
+ ".previous"
+ : : "r" (from) );
+
+
+ for(; i>0; i--)
+ {
+ __asm__ __volatile__ (
+ "1: prefetch 320(%0)\n"
+ "2: movq (%0), %%mm0\n"
+ " movq 8(%0), %%mm1\n"
+ " movq 16(%0), %%mm2\n"
+ " movq 24(%0), %%mm3\n"
+ " movq %%mm0, (%1)\n"
+ " movq %%mm1, 8(%1)\n"
+ " movq %%mm2, 16(%1)\n"
+ " movq %%mm3, 24(%1)\n"
+ " movq 32(%0), %%mm0\n"
+ " movq 40(%0), %%mm1\n"
+ " movq 48(%0), %%mm2\n"
+ " movq 56(%0), %%mm3\n"
+ " movq %%mm0, 32(%1)\n"
+ " movq %%mm1, 40(%1)\n"
+ " movq %%mm2, 48(%1)\n"
+ " movq %%mm3, 56(%1)\n"
+ ".section .fixup, \"ax\"\n"
+ "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
+ " jmp 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b, 3b\n"
+ ".previous"
+ : : "r" (from), "r" (to) : "memory");
+ from+=64;
+ to+=64;
+ }
+ /*
+ * Now do the tail of the block
+ */
+ __memcpy(to, from, len&63);
+ stts();
+ return p;
+}
+
+static void fast_clear_page(long page)
+{
+ int i;
+ if (!(current->flags & PF_USEDFPU))
+ clts();
+ else
+ {
+ __asm__ __volatile__ ( " fnsave %0; fwait\n"::"m"(current->thread.i387));
+ current->flags &= ~PF_USEDFPU;
+ }
+
+ __asm__ __volatile__ (
+ " pxor %%mm0, %%mm0\n" : :
+ );
+
+ for(i=0;i<4096/128;i++)
+ {
+ __asm__ __volatile__ (
+ " movq %%mm0, (%0)\n"
+ " movq %%mm0, 8(%0)\n"
+ " movq %%mm0, 16(%0)\n"
+ " movq %%mm0, 24(%0)\n"
+ " movq %%mm0, 32(%0)\n"
+ " movq %%mm0, 40(%0)\n"
+ " movq %%mm0, 48(%0)\n"
+ " movq %%mm0, 56(%0)\n"
+ " movq %%mm0, 64(%0)\n"
+ " movq %%mm0, 72(%0)\n"
+ " movq %%mm0, 80(%0)\n"
+ " movq %%mm0, 88(%0)\n"
+ " movq %%mm0, 96(%0)\n"
+ " movq %%mm0, 104(%0)\n"
+ " movq %%mm0, 112(%0)\n"
+ " movq %%mm0, 120(%0)\n"
+ : : "r" (page) : "memory");
+ page+=128;
+ }
+ stts();
+}
+
+static void fast_copy_page(long to, long from)
+{
+ int i;
+ if (!(current->flags & PF_USEDFPU))
+ clts();
+ else
+ {
+ __asm__ __volatile__ ( " fnsave %0; fwait\n"::"m"(current->thread.i387));
+ current->flags &= ~PF_USEDFPU;
+ }
+
+ __asm__ __volatile__ (
+ "1: prefetch (%0)\n"
+ " prefetch 64(%0)\n"
+ " prefetch 128(%0)\n"
+ " prefetch 192(%0)\n"
+ " prefetch 256(%0)\n"
+ "2: \n"
+ ".section .fixup, \"ax\"\n"
+ "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
+ " jmp 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b, 3b\n"
+ ".previous"
+ : : "r" (from) );
+
+ for(i=0; i<4096/64; i++)
+ {
+ __asm__ __volatile__ (
+ "1: prefetch 320(%0)\n"
+ "2: movq (%0), %%mm0\n"
+ " movq 8(%0), %%mm1\n"
+ " movq 16(%0), %%mm2\n"
+ " movq 24(%0), %%mm3\n"
+ " movq %%mm0, (%1)\n"
+ " movq %%mm1, 8(%1)\n"
+ " movq %%mm2, 16(%1)\n"
+ " movq %%mm3, 24(%1)\n"
+ " movq 32(%0), %%mm0\n"
+ " movq 40(%0), %%mm1\n"
+ " movq 48(%0), %%mm2\n"
+ " movq 56(%0), %%mm3\n"
+ " movq %%mm0, 32(%1)\n"
+ " movq %%mm1, 40(%1)\n"
+ " movq %%mm2, 48(%1)\n"
+ " movq %%mm3, 56(%1)\n"
+ ".section .fixup, \"ax\"\n"
+ "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
+ " jmp 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b, 3b\n"
+ ".previous"
+ : : "r" (from), "r" (to) : "memory");
+ from+=64;
+ to+=64;
+ }
+ stts();
+}
+
+/*
+ * Favour MMX for page clear and copy.
+ */
+
+static void slow_zero_page(long page)
+{
+ int d0, d1;
+ __asm__ __volatile__( \
+ "cld\n\t" \
+ "rep ; stosl" \
+ : "=&c" (d0), "=&D" (d1)
+ :"a" (0),"1" (page),"0" (1024)
+ :"memory");
+}
+
+void mmx_clear_page(long page)
+{
+ if(in_interrupt())
+ slow_zero_page(page);
+ else
+ fast_clear_page(page);
+}
+
+static void slow_copy_page(long to, long from)
+{
+ int d0, d1, d2;
+ __asm__ __volatile__( \
+ "cld\n\t" \
+ "rep ; movsl" \
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2) \
+ : "0" (1024),"1" ((long) to),"2" ((long) from) \
+ : "memory");
+}
+
+
+void mmx_copy_page(long to, long from)
+{
+ if(in_interrupt())
+ slow_copy_page(to, from);
+ else
+ fast_copy_page(to, from);
+}
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
index f43be511f..21d8fdf62 100644
--- a/arch/i386/lib/usercopy.c
+++ b/arch/i386/lib/usercopy.c
@@ -6,6 +6,37 @@
* Copyright 1997 Linus Torvalds
*/
#include <asm/uaccess.h>
+#include <asm/mmx.h>
+
+#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS
+
+unsigned long
+__generic_copy_to_user(void *to, const void *from, unsigned long n)
+{
+ if (access_ok(VERIFY_WRITE, to, n))
+ {
+ if(n<512)
+ __copy_user(to,from,n);
+ else
+ mmx_copy_user(to,from,n);
+ }
+ return n;
+}
+
+unsigned long
+__generic_copy_from_user(void *to, const void *from, unsigned long n)
+{
+ if (access_ok(VERIFY_READ, from, n))
+ {
+ if(n<512)
+ __copy_user_zeroing(to,from,n);
+ else
+ mmx_copy_user_zeroing(to, from, n);
+ }
+ return n;
+}
+
+#else
unsigned long
__generic_copy_to_user(void *to, const void *from, unsigned long n)
@@ -23,6 +54,7 @@ __generic_copy_from_user(void *to, const void *from, unsigned long n)
return n;
}
+#endif
/*
* Copy a null terminated string from userspace.
@@ -117,26 +149,31 @@ __clear_user(void *to, unsigned long n)
/*
* Return the size of a string (including the ending 0)
*
- * Return 0 for error
+ * Return 0 on exception, a value greater than N if too long
*/
-long strlen_user(const char *s)
+long strnlen_user(const char *s, long n)
{
- unsigned long res;
+ unsigned long mask = -__addr_ok(s);
+ unsigned long res, tmp;
__asm__ __volatile__(
+ " andl %0,%%ecx\n"
"0: repne; scasb\n"
- " notl %0\n"
+ " setne %%al\n"
+ " subl %%ecx,%0\n"
+ " addl %0,%%eax\n"
"1:\n"
".section .fixup,\"ax\"\n"
- "2: xorl %0,%0\n"
+ "2: xorl %%eax,%%eax\n"
" jmp 1b\n"
".previous\n"
".section __ex_table,\"a\"\n"
" .align 4\n"
" .long 0b,2b\n"
".previous"
- :"=c" (res), "=D" (s)
- :"1" (s), "a" (0), "0" (-__addr_ok(s)));
- return res & -__addr_ok(s);
+ :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp)
+ :"0" (n), "1" (s), "2" (0), "3" (mask)
+ :"cc");
+ return res & mask;
}