diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-07-26 21:06:03 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-07-26 21:06:03 +0000 |
commit | a27ad9b5cf49fea1a55141868f4bf02c2d91bfc1 (patch) | |
tree | 06f13f2b04c9d3657113087f3bd282195353ae52 /arch/mips64/mm/andes.c | |
parent | 9f1a49a1553d9614c4d3a581efe8c98c62ebb55b (diff) |
Optimize andes_clear_page() and andes_copy_page() with prefetch
operations. While those routines are now 30% - 60% faster it turned
out that their influence on realworld applications and benchmars is
fairly low; I saw an improvment of ~ 3% for lmbench exec sh and even
less for other benches.
Diffstat (limited to 'arch/mips64/mm/andes.c')
-rw-r--r-- | arch/mips64/mm/andes.c | 39 |
1 files changed, 23 insertions, 16 deletions
diff --git a/arch/mips64/mm/andes.c b/arch/mips64/mm/andes.c index 3dfabd169..9c6fb5fa4 100644 --- a/arch/mips64/mm/andes.c +++ b/arch/mips64/mm/andes.c @@ -1,5 +1,4 @@ -/* $Id: andes.c,v 1.7 2000/03/13 22:43:25 kanoj Exp $ - * +/* * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. @@ -24,14 +23,18 @@ "nop; nop; nop; nop; nop; nop;\n\t" \ ".set reorder\n\t") -/* R10000 has no Create_Dirty type cacheops. */ +/* + * This version has been tuned on an Origin. For other machines the arguments + * of the pref instructin may have to be tuned differently. + */ static void andes_clear_page(void * page) { __asm__ __volatile__( ".set\tnoreorder\n\t" ".set\tnoat\n\t" "daddiu\t$1,%0,%2\n" - "1:\tsd\t$0,(%0)\n\t" + "1:\tpref 7,512(%0)\n\t" + "sd\t$0,(%0)\n\t" "sd\t$0,8(%0)\n\t" "sd\t$0,16(%0)\n\t" "sd\t$0,24(%0)\n\t" @@ -48,36 +51,40 @@ static void andes_clear_page(void * page) :"$1", "memory"); } +/* R10000 has no Create_Dirty type cacheops. */ static void andes_copy_page(void * to, void * from) { - unsigned long dummy1, dummy2, reg1, reg2; + unsigned long dummy1, dummy2, reg1, reg2, reg3, reg4; __asm__ __volatile__( ".set\tnoreorder\n\t" ".set\tnoat\n\t" - "daddiu\t$1,%0,%6\n" - "1:\tld\t%2,(%1)\n\t" + "daddiu\t$1,%0,%8\n" + "1:\tpref\t0,2*128(%1)\n\t" + "pref\t1,2*128(%0)\n\t" + "ld\t%2,(%1)\n\t" "ld\t%3,8(%1)\n\t" + "ld\t%4,16(%1)\n\t" + "ld\t%5,24(%1)\n\t" "sd\t%2,(%0)\n\t" "sd\t%3,8(%0)\n\t" - "ld\t%2,16(%1)\n\t" - "ld\t%3,24(%1)\n\t" - "sd\t%2,16(%0)\n\t" - "sd\t%3,24(%0)\n\t" + "sd\t%4,16(%0)\n\t" + "sd\t%5,24(%0)\n\t" "daddiu\t%0,64\n\t" "daddiu\t%1,64\n\t" "ld\t%2,-32(%1)\n\t" "ld\t%3,-24(%1)\n\t" + "ld\t%4,-16(%1)\n\t" + "ld\t%5,-8(%1)\n\t" "sd\t%2,-32(%0)\n\t" "sd\t%3,-24(%0)\n\t" - "ld\t%2,-16(%1)\n\t" - "ld\t%3,-8(%1)\n\t" - "sd\t%2,-16(%0)\n\t" + "sd\t%4,-16(%0)\n\t" "bne\t$1,%0,1b\n\t" - " sd\t%3,-8(%0)\n\t" + " sd\t%5,-8(%0)\n\t" ".set\tat\n\t" ".set\treorder" - :"=r" (dummy1), "=r" (dummy2), "=&r" (reg1), "=&r" (reg2) + :"=r" (dummy1), "=r" (dummy2), "=&r" (reg1), "=&r" (reg2), + "=&r" (reg3), "=&r" (reg4) :"0" (to), "1" (from), "I" (PAGE_SIZE)); } |