diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1998-08-25 09:12:35 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1998-08-25 09:12:35 +0000 |
commit | c7fc24dc4420057f103afe8fc64524ebc25c5d37 (patch) | |
tree | 3682407a599b8f9f03fc096298134cafba1c9b2f /arch/sparc64/lib/memscan.S | |
parent | 1d793fade8b063fde3cf275bf1a5c2d381292cd9 (diff) |
o Merge with Linux 2.1.116.
o New Newport console code.
o New G364 console code.
Diffstat (limited to 'arch/sparc64/lib/memscan.S')
-rw-r--r-- | arch/sparc64/lib/memscan.S | 203 |
1 files changed, 108 insertions, 95 deletions
diff --git a/arch/sparc64/lib/memscan.S b/arch/sparc64/lib/memscan.S index 83abe4040..423bc1409 100644 --- a/arch/sparc64/lib/memscan.S +++ b/arch/sparc64/lib/memscan.S @@ -1,116 +1,129 @@ -/* $Id: memscan.S,v 1.1 1997/03/14 21:04:24 jj Exp $ - * memscan.S: Optimized memscan for the Sparc64. +/* $Id: memscan.S,v 1.2 1998/05/21 14:42:22 jj Exp $ + * memscan.S: Optimized memscan for Sparc64. * - * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1998 David S. Miller (davem@dm.cobaltmicro.com) */ -/* In essence, this is just a fancy strlen. */ - -#define LO_MAGIC 0x01010101 -#define HI_MAGIC 0x80808080 +#define HI_MAGIC 0x8080808080808080 +#define LO_MAGIC 0x0101010101010101 +#define ASI_PL 0x88 .text - .align 4 - .globl __memscan_zero, __memscan_generic - .globl memscan + .align 32 + .globl __memscan_zero, __memscan_generic + .globl memscan + __memscan_zero: - /* %o0 = addr, %o1 = size */ - brlez,pn %o1, 0f - andcc %o0, 3, %g0 - be,pt %icc, 9f - sethi %hi(HI_MAGIC), %o4 - ldub [%o0], %o5 - subcc %o1, 1, %o1 - brz,pn %o5, 10f - add %o0, 1, %o0 - be,pn %xcc, 0f - andcc %o0, 3, %g0 - be,pn %icc, 4f - or %o4, %lo(HI_MAGIC), %o3 - ldub [%o0], %o5 - subcc %o1, 1, %o1 - brz,pn %o5, 10f - add %o0, 1, %o0 - be,pn %xcc, 0f - andcc %o0, 3, %g0 - be,pt %icc, 5f - sethi %hi(LO_MAGIC), %o4 - ldub [%o0], %o5 - subcc %o1, 1, %o1 - brz,pn %o5, 10f - add %o0, 1, %o0 - be,pn %xcc, 0f - or %o4, %lo(LO_MAGIC), %o2 - ba,pt %xcc, 2f - ld [%o0], %o5 -9: - or %o4, %lo(HI_MAGIC), %o3 -4: - sethi %hi(LO_MAGIC), %o4 -5: - or %o4, %lo(LO_MAGIC), %o2 - ld [%o0], %o5 -2: - sub %o5, %o2, %o4 - sub %o1, 4, %o1 - andcc %o4, %o3, %g0 - be,pn %icc, 1f - add %o0, 4, %o0 - brgz,pt %o1, 2b - ld [%o0], %o5 + /* %o0 = bufp, %o1 = size */ + brlez,pn %o1, szzero + andcc %o0, 7, %g0 + be,pt %icc, we_are_aligned + sethi %hi(HI_MAGIC), %o4 + ldub [%o0], %o5 +1: subcc %o1, 1, %o1 + brz,pn %o5, 10f + add %o0, 1, %o0 + be,pn %xcc, szzero + andcc %o0, 7, %g0 + bne,a,pn %icc, 1b + ldub [%o0], %o5 +we_are_aligned: + ldxa [%o0] ASI_PL, %o5 + or %o4, %lo(HI_MAGIC), %o3 + sllx %o3, 32, %o4 + or %o4, %o3, %o3 + + srlx %o3, 7, %o2 +msloop: + sub %o1, 8, %o1 + add %o0, 8, %o0 + sub %o5, %o2, %o4 + xor %o4, %o5, %o4 + andcc %o4, %o3, %g3 + bne,pn %xcc, check_bytes + srlx %o4, 32, %g3 + + brgz,a,pt %o1, msloop + ldxa [%o0] ASI_PL, %o5 +check_bytes: + bne,a,pn %icc, 2f + andcc %o5, 0xff, %g0 + add %o0, -5, %g2 + ba,pt %xcc, 3f + srlx %o5, 32, %g5 + +2: srlx %o5, 8, %g5 + be,pn %icc, 1f + add %o0, -8, %g2 + andcc %g5, 0xff, %g0 + srlx %g5, 8, %g5 + be,pn %icc, 1f + inc %g2 + andcc %g5, 0xff, %g0 + + srlx %g5, 8, %g5 + be,pn %icc, 1f + inc %g2 + andcc %g5, 0xff, %g0 + srlx %g5, 8, %g5 + be,pn %icc, 1f + inc %g2 + andcc %g3, %o3, %g0 + + be,a,pn %icc, 2f + mov %o0, %g2 +3: andcc %g5, 0xff, %g0 + srlx %g5, 8, %g5 + be,pn %icc, 1f + inc %g2 + andcc %g5, 0xff, %g0 + srlx %g5, 8, %g5 + + be,pn %icc, 1f + inc %g2 + andcc %g5, 0xff, %g0 + srlx %g5, 8, %g5 + be,pn %icc, 1f + inc %g2 + andcc %g5, 0xff, %g0 + srlx %g5, 8, %g5 + + be,pn %icc, 1f + inc %g2 +2: brgz,a,pt %o1, msloop + ldxa [%o0] ASI_PL, %o5 + inc %g2 +1: add %o0, %o1, %o0 + cmp %g2, %o0 retl - add %o0, %o1, %o0 -1: - /* Check every byte. */ - srl %o5, 24, %g5 - andcc %g5, 0xff, %g0 - be,pn %icc, 1f - add %o0, -4, %o4 - srl %o5, 16, %g5 - andcc %g5, 0xff, %g0 - be,pn %icc, 1f - add %o4, 1, %o4 - srl %o5, 8, %g5 - andcc %g5, 0xff, %g0 - be,pn %icc, 1f - add %o4, 1, %o4 - andcc %o5, 0xff, %g0 - be,pn %icc, 1f - add %o4, 1, %o4 - brgz,pt %o1, 2b - ld [%o0], %o5 -1: - add %o0, %o1, %o0 - cmp %o4, %o0 - retl - movle %xcc, %o4, %o0 -0: - retl + + movle %xcc, %g2, %o0 +10: retl + sub %o0, 1, %o0 +szzero: retl nop -10: - retl - sub %o0, 1, %o0 memscan: __memscan_generic: /* %o0 = addr, %o1 = c, %o2 = size */ - brz,pn %o2, 3f - add %o0, %o2, %o3 - ldub [%o0], %o5 - sub %g0, %o2, %o4 + brz,pn %o2, 3f + add %o0, %o2, %o3 + ldub [%o0], %o5 + sub %g0, %o2, %o4 1: - cmp %o5, %o1 - be,pn %icc, 2f - addcc %o4, 1, %o4 - bne,a,pt %xcc, 1b - ldub [%o3 + %o4], %o5 + cmp %o5, %o1 + be,pn %icc, 2f + addcc %o4, 1, %o4 + bne,a,pt %xcc, 1b + ldub [%o3 + %o4], %o5 retl /* The delay slot is the same as the next insn, this is just to make it look more awful */ 2: - add %o3, %o4, %o0 + add %o3, %o4, %o0 retl - sub %o0, 1, %o0 + sub %o0, 1, %o0 3: retl nop |