From a27ad9b5cf49fea1a55141868f4bf02c2d91bfc1 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 26 Jul 2000 21:06:03 +0000 Subject: Optimize andes_clear_page() and andes_copy_page() with prefetch operations. While those routines are now 30% - 60% faster it turned out that their influence on realworld applications and benchmars is fairly low; I saw an improvment of ~ 3% for lmbench exec sh and even less for other benches. --- arch/mips64/mm/andes.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'arch/mips64/mm') diff --git a/arch/mips64/mm/andes.c b/arch/mips64/mm/andes.c index 3dfabd169..9c6fb5fa4 100644 --- a/arch/mips64/mm/andes.c +++ b/arch/mips64/mm/andes.c @@ -1,5 +1,4 @@ -/* $Id: andes.c,v 1.7 2000/03/13 22:43:25 kanoj Exp $ - * +/* * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. @@ -24,14 +23,18 @@ "nop; nop; nop; nop; nop; nop;\n\t" \ ".set reorder\n\t") -/* R10000 has no Create_Dirty type cacheops. */ +/* + * This version has been tuned on an Origin. For other machines the arguments + * of the pref instructin may have to be tuned differently. + */ static void andes_clear_page(void * page) { __asm__ __volatile__( ".set\tnoreorder\n\t" ".set\tnoat\n\t" "daddiu\t$1,%0,%2\n" - "1:\tsd\t$0,(%0)\n\t" + "1:\tpref 7,512(%0)\n\t" + "sd\t$0,(%0)\n\t" "sd\t$0,8(%0)\n\t" "sd\t$0,16(%0)\n\t" "sd\t$0,24(%0)\n\t" @@ -48,36 +51,40 @@ static void andes_clear_page(void * page) :"$1", "memory"); } +/* R10000 has no Create_Dirty type cacheops. */ static void andes_copy_page(void * to, void * from) { - unsigned long dummy1, dummy2, reg1, reg2; + unsigned long dummy1, dummy2, reg1, reg2, reg3, reg4; __asm__ __volatile__( ".set\tnoreorder\n\t" ".set\tnoat\n\t" - "daddiu\t$1,%0,%6\n" - "1:\tld\t%2,(%1)\n\t" + "daddiu\t$1,%0,%8\n" + "1:\tpref\t0,2*128(%1)\n\t" + "pref\t1,2*128(%0)\n\t" + "ld\t%2,(%1)\n\t" "ld\t%3,8(%1)\n\t" + "ld\t%4,16(%1)\n\t" + "ld\t%5,24(%1)\n\t" "sd\t%2,(%0)\n\t" "sd\t%3,8(%0)\n\t" - "ld\t%2,16(%1)\n\t" - "ld\t%3,24(%1)\n\t" - "sd\t%2,16(%0)\n\t" - "sd\t%3,24(%0)\n\t" + "sd\t%4,16(%0)\n\t" + "sd\t%5,24(%0)\n\t" "daddiu\t%0,64\n\t" "daddiu\t%1,64\n\t" "ld\t%2,-32(%1)\n\t" "ld\t%3,-24(%1)\n\t" + "ld\t%4,-16(%1)\n\t" + "ld\t%5,-8(%1)\n\t" "sd\t%2,-32(%0)\n\t" "sd\t%3,-24(%0)\n\t" - "ld\t%2,-16(%1)\n\t" - "ld\t%3,-8(%1)\n\t" - "sd\t%2,-16(%0)\n\t" + "sd\t%4,-16(%0)\n\t" "bne\t$1,%0,1b\n\t" - " sd\t%3,-8(%0)\n\t" + " sd\t%5,-8(%0)\n\t" ".set\tat\n\t" ".set\treorder" - :"=r" (dummy1), "=r" (dummy2), "=&r" (reg1), "=&r" (reg2) + :"=r" (dummy1), "=r" (dummy2), "=&r" (reg1), "=&r" (reg2), + "=&r" (reg3), "=&r" (reg4) :"0" (to), "1" (from), "I" (PAGE_SIZE)); } -- cgit v1.2.3