summaryrefslogtreecommitdiffstats
path: root/arch/mips64/mm/andes.c
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-07-26 21:06:03 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-07-26 21:06:03 +0000
commita27ad9b5cf49fea1a55141868f4bf02c2d91bfc1 (patch)
tree06f13f2b04c9d3657113087f3bd282195353ae52 /arch/mips64/mm/andes.c
parent9f1a49a1553d9614c4d3a581efe8c98c62ebb55b (diff)
Optimize andes_clear_page() and andes_copy_page() with prefetch
operations. While those routines are now 30% - 60% faster it turned out that their influence on realworld applications and benchmars is fairly low; I saw an improvment of ~ 3% for lmbench exec sh and even less for other benches.
Diffstat (limited to 'arch/mips64/mm/andes.c')
-rw-r--r--arch/mips64/mm/andes.c39
1 files changed, 23 insertions, 16 deletions
diff --git a/arch/mips64/mm/andes.c b/arch/mips64/mm/andes.c
index 3dfabd169..9c6fb5fa4 100644
--- a/arch/mips64/mm/andes.c
+++ b/arch/mips64/mm/andes.c
@@ -1,5 +1,4 @@
-/* $Id: andes.c,v 1.7 2000/03/13 22:43:25 kanoj Exp $
- *
+/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
@@ -24,14 +23,18 @@
"nop; nop; nop; nop; nop; nop;\n\t" \
".set reorder\n\t")
-/* R10000 has no Create_Dirty type cacheops. */
+/*
+ * This version has been tuned on an Origin. For other machines the arguments
+ * of the pref instructin may have to be tuned differently.
+ */
static void andes_clear_page(void * page)
{
__asm__ __volatile__(
".set\tnoreorder\n\t"
".set\tnoat\n\t"
"daddiu\t$1,%0,%2\n"
- "1:\tsd\t$0,(%0)\n\t"
+ "1:\tpref 7,512(%0)\n\t"
+ "sd\t$0,(%0)\n\t"
"sd\t$0,8(%0)\n\t"
"sd\t$0,16(%0)\n\t"
"sd\t$0,24(%0)\n\t"
@@ -48,36 +51,40 @@ static void andes_clear_page(void * page)
:"$1", "memory");
}
+/* R10000 has no Create_Dirty type cacheops. */
static void andes_copy_page(void * to, void * from)
{
- unsigned long dummy1, dummy2, reg1, reg2;
+ unsigned long dummy1, dummy2, reg1, reg2, reg3, reg4;
__asm__ __volatile__(
".set\tnoreorder\n\t"
".set\tnoat\n\t"
- "daddiu\t$1,%0,%6\n"
- "1:\tld\t%2,(%1)\n\t"
+ "daddiu\t$1,%0,%8\n"
+ "1:\tpref\t0,2*128(%1)\n\t"
+ "pref\t1,2*128(%0)\n\t"
+ "ld\t%2,(%1)\n\t"
"ld\t%3,8(%1)\n\t"
+ "ld\t%4,16(%1)\n\t"
+ "ld\t%5,24(%1)\n\t"
"sd\t%2,(%0)\n\t"
"sd\t%3,8(%0)\n\t"
- "ld\t%2,16(%1)\n\t"
- "ld\t%3,24(%1)\n\t"
- "sd\t%2,16(%0)\n\t"
- "sd\t%3,24(%0)\n\t"
+ "sd\t%4,16(%0)\n\t"
+ "sd\t%5,24(%0)\n\t"
"daddiu\t%0,64\n\t"
"daddiu\t%1,64\n\t"
"ld\t%2,-32(%1)\n\t"
"ld\t%3,-24(%1)\n\t"
+ "ld\t%4,-16(%1)\n\t"
+ "ld\t%5,-8(%1)\n\t"
"sd\t%2,-32(%0)\n\t"
"sd\t%3,-24(%0)\n\t"
- "ld\t%2,-16(%1)\n\t"
- "ld\t%3,-8(%1)\n\t"
- "sd\t%2,-16(%0)\n\t"
+ "sd\t%4,-16(%0)\n\t"
"bne\t$1,%0,1b\n\t"
- " sd\t%3,-8(%0)\n\t"
+ " sd\t%5,-8(%0)\n\t"
".set\tat\n\t"
".set\treorder"
- :"=r" (dummy1), "=r" (dummy2), "=&r" (reg1), "=&r" (reg2)
+ :"=r" (dummy1), "=r" (dummy2), "=&r" (reg1), "=&r" (reg2),
+ "=&r" (reg3), "=&r" (reg4)
:"0" (to), "1" (from), "I" (PAGE_SIZE));
}