summaryrefslogtreecommitdiffstats
path: root/arch/sparc64/lib/VISbzero.S
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1997-07-20 14:56:40 +0000
committerRalf Baechle <ralf@linux-mips.org>1997-07-20 14:56:40 +0000
commite308faf24f68e262d92d294a01ddca7a17e76762 (patch)
tree22c47cb315811834861f013067878ff664e95abd /arch/sparc64/lib/VISbzero.S
parent30c6397ce63178fcb3e7963ac247f0a03132aca9 (diff)
Sync with Linux 2.1.46.
Diffstat (limited to 'arch/sparc64/lib/VISbzero.S')
-rw-r--r--arch/sparc64/lib/VISbzero.S246
1 files changed, 246 insertions, 0 deletions
diff --git a/arch/sparc64/lib/VISbzero.S b/arch/sparc64/lib/VISbzero.S
new file mode 100644
index 000000000..3c86861fd
--- /dev/null
+++ b/arch/sparc64/lib/VISbzero.S
@@ -0,0 +1,246 @@
+/* $Id: VISbzero.S,v 1.1 1997/07/18 06:26:48 ralf Exp $
+ * VISbzero.S: High speed clear operations utilizing the UltraSparc
+ * Visual Instruction Set.
+ *
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include "VIS.h"
+
+#ifdef __KERNEL__
+#define EXN(x,y,a,b,z) \
+98: x,y; \
+ .section .fixup; \
+ .align 4; \
+99: ba VISbzerofixup_ret##z; \
+ a, b, %o0; \
+ .section __ex_table; \
+ .align 8; \
+ .xword 98b, 99b; \
+ .text; \
+ .align 4;
+#define EXC(x,y,a,b,c...) \
+98: x,y; \
+ .section .fixup; \
+ .align 4; \
+99: c; \
+ ba VISbzerofixup_ret0; \
+ a, b, %o0; \
+ .section __ex_table; \
+ .align 8; \
+ .xword 98b, 99b; \
+ .text; \
+ .align 4;
+#define EXO1(x,y) \
+98: x,y; \
+ .section __ex_table; \
+ .align 8; \
+ .xword 98b, VISbzerofixup_reto1; \
+ .text; \
+ .align 4;
+#define EX(x,y,a,b) EXN(x,y,a,b,0)
+#define EX1(x,y,a,b) EXN(x,y,a,b,1)
+#define EX2(x,y,a,b) EXN(x,y,a,b,2)
+#define EXT(start,end,handler) \
+ .section __ex_table; \
+ .align 8; \
+ .xword start, 0, end, handler; \
+ .text; \
+ .align 4
+#else
+#define EX(x,y,a,b) x,y
+#define EX1(x,y,a,b) x,y
+#define EX2(x,y,a,b) x,y
+#define EXC(x,y,a,b,c...) x,y
+#define EXO1(x,y) x,y
+#define EXT(a,b,c)
+#endif
+
+#define ZERO_BLOCKS(base, offset, source) \
+ STX source, [base - offset - 0x38] ASINORMAL; \
+ STX source, [base - offset - 0x30] ASINORMAL; \
+ STX source, [base - offset - 0x28] ASINORMAL; \
+ STX source, [base - offset - 0x20] ASINORMAL; \
+ STX source, [base - offset - 0x18] ASINORMAL; \
+ STX source, [base - offset - 0x10] ASINORMAL; \
+ STX source, [base - offset - 0x08] ASINORMAL; \
+ STX source, [base - offset - 0x00] ASINORMAL;
+
+#ifdef __KERNEL__
+#define RETL clr %o0
+#else
+#define RETL mov %g3, %o0
+#endif
+
+ /* Well, bzero is a lot easier to get right than bcopy... */
+#ifdef __KERNEL__
+ .section __ex_table,#alloc
+ .section .fixup,#alloc,#execinstr
+#endif
+ .text
+ .align 32
+#ifdef __KERNEL__
+ .globl __bzero, __bzero_noasi
+__bzero:
+ wr %g0, ASI_P, %asi ! LSU Group
+__bzero_noasi:
+#else
+ .globl bzero
+bzero_private:
+bzero:
+#ifndef REGS_64BIT
+ srl %o1, 0, %o1
+#endif
+ mov %o0, %g3
+#endif
+ cmp %o1, 7
+ bleu,pn %xcc, 17f
+ andcc %o0, 3, %o2
+ be,a,pt %xcc, 4f
+ andcc %o0, 4, %g0
+ cmp %o2, 3
+ be,pn %xcc, 2f
+ EXO1(STB %g0, [%o0 + 0x00] ASINORMAL)
+ cmp %o2, 2
+ be,pt %xcc, 2f
+ EX(STB %g0, [%o0 + 0x01] ASINORMAL, sub %o1, 1)
+ EX(STB %g0, [%o0 + 0x02] ASINORMAL, sub %o1, 2)
+2: sub %o2, 4, %o2
+ sub %o0, %o2, %o0
+ add %o1, %o2, %o1
+ andcc %o0, 4, %g0
+4: be,pt %xcc, 2f
+ cmp %o1, 128
+ EXO1(STW %g0, [%o0] ASINORMAL)
+ sub %o1, 4, %o1
+ add %o0, 4, %o0
+2: blu,pn %xcc, 9f
+ andcc %o0, 0x38, %o2
+ be,pn %icc, 6f
+ mov 64, %o5
+ andcc %o0, 8, %g0
+ be,pn %icc, 1f
+ sub %o5, %o2, %o5
+ EX(STX %g0, [%o0] ASINORMAL, sub %o1, 0)
+ add %o0, 8, %o0
+1: andcc %o5, 16, %g0
+ be,pn %icc, 1f
+ sub %o1, %o5, %o1
+ EX1(STX %g0, [%o0] ASINORMAL, add %g0, 0)
+ EX1(STX %g0, [%o0 + 8] ASINORMAL, sub %g0, 8)
+ add %o0, 16, %o0
+1: andcc %o5, 32, %g0
+ be,pn %icc, 7f
+ andncc %o1, 0x3f, %o3
+ EX(STX %g0, [%o0] ASINORMAL, add %o1, 32)
+ EX(STX %g0, [%o0 + 8] ASINORMAL, add %o1, 24)
+ EX(STX %g0, [%o0 + 16] ASINORMAL, add %o1, 16)
+ EX(STX %g0, [%o0 + 24] ASINORMAL, add %o1, 8)
+ add %o0, 32, %o0
+6: andncc %o1, 0x3f, %o3
+7: be,pn %xcc, 9f
+#ifdef __KERNEL__
+ rd %asi, %g7
+ wr %g0, FPRS_FEF, %fprs
+ wr %g7, ASI_BLK_XOR, %asi
+#else
+ wr %g0, ASI_BLK_P, %asi
+#endif
+ membar #StoreStore | #LoadStore
+ fzero %f0
+ andcc %o3, 0xc0, %o2
+ and %o1, 0x3f, %o1
+ fzero %f2
+ andn %o3, 0xff, %o3
+ faddd %f0, %f2, %f4
+ fmuld %f0, %f2, %f6
+ cmp %o2, 64
+ faddd %f0, %f2, %f8
+ fmuld %f0, %f2, %f10
+ faddd %f0, %f2, %f12
+ brz,pn %o2, 10f
+ fmuld %f0, %f2, %f14
+ be,pn %icc, 2f
+ EXC(STBLK %f0, [%o0 + 0x00] ASIBLK, add %o3, %o2, add %o2, %o1, %o2)
+ cmp %o2, 128
+ be,pn %icc, 2f
+ EXC(STBLK %f0, [%o0 + 0x40] ASIBLK, add %o3, %o2, add %o2, %o1, %o2; sub %o2, 64, %o2)
+ EXC(STBLK %f0, [%o0 + 0x80] ASIBLK, add %o3, %o2, add %o2, %o1, %o2; sub %o2, 128, %o2)
+2: brz,pn %o3, 12f
+ add %o0, %o2, %o0
+10: EX(STBLK %f0, [%o0 + 0x00] ASIBLK, add %o3, %o1)
+ EXC(STBLK %f0, [%o0 + 0x40] ASIBLK, add %o3, %o1, sub %o1, 64, %o1)
+ EXC(STBLK %f0, [%o0 + 0x80] ASIBLK, add %o3, %o1, sub %o1, 128, %o1)
+ EXC(STBLK %f0, [%o0 + 0xc0] ASIBLK, add %o3, %o1, sub %o1, 192, %o1)
+11: subcc %o3, 256, %o3
+ bne,pt %xcc, 10b
+ add %o0, 256, %o0
+12:
+#ifdef __KERNEL__
+ wr %g0, 0, %fprs
+ wr %g7, 0x0, %asi
+#endif
+ membar #Sync
+9: andcc %o1, 0xf8, %o2
+ be,pn %xcc, 13f
+ andcc %o1, 7, %o1
+14: rd %pc, %o4
+ srl %o2, 1, %o3
+ sub %o4, %o3, %o4
+ jmpl %o4 + (13f - 14b), %g0
+ add %o0, %o2, %o0
+12: ZERO_BLOCKS(%o0, 0xc8, %g0)
+ ZERO_BLOCKS(%o0, 0x88, %g0)
+ ZERO_BLOCKS(%o0, 0x48, %g0)
+ ZERO_BLOCKS(%o0, 0x08, %g0)
+ EXT(12b,13f,VISbzerofixup_zb)
+13: be,pn %xcc, 8f
+ andcc %o1, 4, %g0
+ be,pn %xcc, 1f
+ andcc %o1, 2, %g0
+ EX(STW %g0, [%o0] ASINORMAL, and %o1, 7)
+ add %o0, 4, %o0
+1: be,pn %xcc, 1f
+ andcc %o1, 1, %g0
+ EX(STH %g0, [%o0] ASINORMAL, and %o1, 3)
+ add %o0, 2, %o0
+1: bne,a,pn %xcc, 8f
+ EX(STB %g0, [%o0] ASINORMAL, add %g0, 1)
+8: retl
+ RETL
+17: be,pn %xcc, 13b
+ orcc %o1, 0, %g0
+ be,pn %xcc, 0f
+8: add %o0, 1, %o0
+ subcc %o1, 1, %o1
+ bne,pt %xcc, 8b
+ EX(STB %g0, [%o0 - 1] ASINORMAL, add %o1, 1)
+0: retl
+ RETL
+
+#ifdef __KERNEL__
+ .section .fixup
+ .align 4
+VISbzerofixup_reto1:
+ mov %o1, %o0
+VISbzerofixup_ret0:
+ retl
+ wr %g0, 0, %fprs
+VISbzerofixup_ret1:
+ and %o5, 0x30, %o5
+ add %o5, %o1, %o5
+ ba,pt %xcc, VISbzerofixup_ret0
+ add %o0, %o5, %o0
+VISbzerofixup_ret2:
+ and %o5, 0x20, %o5
+ add %o5, %o1, %o5
+ ba,pt %xcc, VISbzerofixup_ret0
+ add %o0, %o5, %o0
+VISbzerofixup_zb:
+ andcc %o1, 7, %o1
+ sll %g2, 3, %g2
+ add %o1, 256, %o1
+ ba,pt %xcc, VISbzerofixup_ret0
+ sub %o1, %g2, %o0
+#endif