summaryrefslogtreecommitdiffstats
path: root/arch/sparc64/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc64/lib')
-rw-r--r--arch/sparc64/lib/Makefile4
-rw-r--r--arch/sparc64/lib/VISbzero.S14
-rw-r--r--arch/sparc64/lib/VIScopy.S115
-rw-r--r--arch/sparc64/lib/VIScsum.S15
-rw-r--r--arch/sparc64/lib/VIScsumcopy.S75
-rw-r--r--arch/sparc64/lib/VISmemset.S15
-rw-r--r--arch/sparc64/lib/VISsave.S122
-rw-r--r--arch/sparc64/lib/blockops.S94
-rw-r--r--arch/sparc64/lib/memscan.S203
9 files changed, 415 insertions, 242 deletions
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile
index 9f8729ee5..a580f7ae4 100644
--- a/arch/sparc64/lib/Makefile
+++ b/arch/sparc64/lib/Makefile
@@ -1,4 +1,4 @@
-# $Id: Makefile,v 1.15 1997/08/19 03:11:50 davem Exp $
+# $Id: Makefile,v 1.16 1998/06/12 14:53:53 jj Exp $
# Makefile for Sparc library files..
#
@@ -6,7 +6,7 @@ CFLAGS := $(CFLAGS)
OBJS = PeeCeeI.o blockops.o locks.o strlen.o strncmp.o \
memscan.o strncpy_from_user.o strlen_user.o memcmp.o checksum.o \
- VIScopy.o VISbzero.o VISmemset.o VIScsum.o VIScsumcopy.o
+ VIScopy.o VISbzero.o VISmemset.o VIScsum.o VIScsumcopy.o VISsave.o
lib.a: $(OBJS)
$(AR) rcs lib.a $(OBJS)
diff --git a/arch/sparc64/lib/VISbzero.S b/arch/sparc64/lib/VISbzero.S
index ede87843b..3992da997 100644
--- a/arch/sparc64/lib/VISbzero.S
+++ b/arch/sparc64/lib/VISbzero.S
@@ -1,4 +1,4 @@
-/* $Id: VISbzero.S,v 1.8 1997/08/22 15:54:50 jj Exp $
+/* $Id: VISbzero.S,v 1.9 1998/06/12 14:53:50 jj Exp $
* VISbzero.S: High speed clear operations utilizing the UltraSparc
* Visual Instruction Set.
*
@@ -9,6 +9,8 @@
#include "VIS.h"
#ifdef __KERNEL__
+#include <asm/visasm.h>
+
#define EXN(x,y,a,b,z) \
98: x,y; \
.section .fixup; \
@@ -141,9 +143,9 @@ bzero:
6: andncc %o1, 0x3f, %o3
7: be,pn %xcc, 9f
#ifdef __KERNEL__
- rd %asi, %g7
- wr %g0, FPRS_FEF, %fprs
- wr %g7, ASI_BLK_XOR, %asi
+ rd %asi, %o4
+ wr %o4, ASI_BLK_XOR, %asi
+ VISEntryHalf
#else
wr %g0, ASI_BLK_P, %asi
#endif
@@ -178,8 +180,8 @@ bzero:
add %o0, 256, %o0
12:
#ifdef __KERNEL__
- wr %g0, 0, %fprs
- wr %g7, 0x0, %asi
+ VISExitHalf
+ wr %o4, 0x0, %asi
#else
#ifndef REGS_64BIT
wr %g0, FPRS_FEF, %fprs
diff --git a/arch/sparc64/lib/VIScopy.S b/arch/sparc64/lib/VIScopy.S
index 40b781e73..7f2f497cd 100644
--- a/arch/sparc64/lib/VIScopy.S
+++ b/arch/sparc64/lib/VIScopy.S
@@ -1,9 +1,9 @@
-/* $Id: VIScopy.S,v 1.14 1997/08/22 15:54:53 jj Exp $
+/* $Id: VIScopy.S,v 1.18 1998/06/12 14:53:55 jj Exp $
* VIScopy.S: High speed copy operations utilizing the UltraSparc
* Visual Instruction Set.
*
* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1996, 1997, 1998 Jakub Jelinek (jj@ultra.linux.cz)
*/
#include "VIS.h"
@@ -24,12 +24,15 @@
*/
#ifdef __KERNEL__
+
+#include <asm/visasm.h>
+
#define FPU_CLEAN_RETL \
- wr %g0, 0, %fprs; \
+ VISExit \
retl; \
clr %o0;
#define FPU_RETL \
- wr %g0, 0, %fprs; \
+ VISExit \
retl; \
clr %o0;
#define NORMAL_RETL \
@@ -40,7 +43,7 @@
.section .fixup; \
.align 4; \
99: ba VIScopyfixup_ret; \
- a, b, %o0; \
+ a, b, %o1; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
@@ -52,7 +55,7 @@
.align 4; \
99: c, d, e; \
ba VIScopyfixup_ret; \
- a, b, %o0; \
+ a, b, %o1; \
.section __ex_table; \
.align 4; \
.word 98b, 99b; \
@@ -298,10 +301,6 @@
.globl __memcpy_entry
.type __memcpy_entry,@function
-
- .globl copy_page
- .type copy_page,@function
-
memcpy_private:
__memcpy:
memcpy: mov ASI_BLK_P, asi_src ! IEU0 Group
@@ -310,12 +309,6 @@ memcpy: mov ASI_BLK_P, asi_src ! IEU0 Group
retl
clr %o0
-copy_page: wr %g0, FPRS_FEF, %fprs ! FPU Group
- sethi %hi(8192), %o2 ! IEU0 Group
- mov ASI_BLK_P, asi_src ! IEU1
- b,pt %xcc, dest_is_64byte_aligned ! CTI
- mov ASI_BLK_P, asi_dest ! IEU0 Group
-
.align 32
.globl __copy_from_user
.type __copy_from_user,@function
@@ -355,7 +348,11 @@ __memcpy_384plus:
#endif
VIS_enter:
be,pt %xcc, dest_is_8byte_aligned ! CTI
+#ifdef __KERNEL__
+ nop ! IEU0 Group
+#else
andcc %o0, 0x38, %g5 ! IEU1 Group
+#endif
do_dest_8byte_align:
mov 8, %g1 ! IEU0
sub %g1, %g2, %g2 ! IEU0 Group
@@ -377,7 +374,8 @@ do_dest_8byte_align:
EX(LDUB [%o1] ASINORMAL, %o5,
add %o2, %g2) ! Load Group
add %o0, 2, %o0 ! IEU0
- EX(LDUB [%o1 + 1] ASINORMAL, %g3,
+ EX2(LDUB [%o1 + 1] ASINORMAL, %g3,
+ sub %o0, 2, %o0,
add %o2, %g2) ! Load Group
ASI_SETDST_NOBLK ! LSU Group
subcc %g2, 2, %g2 ! IEU1 Group
@@ -389,17 +387,17 @@ do_dest_8byte_align:
EX2(STB %g3, [%o0 - 1] ASINORMAL,
add %g2, 1, %g2,
add %o2, %g2) ! Store
-3: andcc %o0, 0x38, %g5 ! IEU1 Group
-dest_is_8byte_aligned:
- be,pt %icc, dest_is_64byte_aligned ! CTI
#ifdef __KERNEL__
- wr %g0, FPRS_FEF, %fprs ! FPU Group
-do_dest_64byte_align:
- mov 64, %g1 ! IEU0 Group
+3:
+dest_is_8byte_aligned:
+ VISEntry
+ andcc %o0, 0x38, %g5 ! IEU1 Group
#else
- mov 64, %g1 ! IEU0 Group
-do_dest_64byte_align:
+3: andcc %o0, 0x38, %g5 ! IEU1 Group
+dest_is_8byte_aligned:
#endif
+ be,pt %icc, dest_is_64byte_aligned ! CTI
+ mov 64, %g1 ! IEU0
fmovd %f0, %f2 ! FPU
sub %g1, %g5, %g5 ! IEU0 Group
ASI_SETSRC_NOBLK ! LSU Group
@@ -646,7 +644,9 @@ __memcpy_short:
2: ASI_SETSRC_NOBLK ! LSU Group
EXO2(LDUB [%o1] ASINORMAL, %g5) ! LOAD Group
add %o0, 2, %o0 ! IEU0
- EXO2(LDUB [%o1 + 1] ASINORMAL, %o5) ! LOAD Group
+ EX2(LDUB [%o1 + 1] ASINORMAL, %o5,
+ sub %o0, 2, %o0,
+ add %o2, %g0) ! LOAD Group
add %o1, 2, %o1 ! IEU0
ASI_SETDST_NOBLK ! LSU Group
subcc %o2, 2, %o2 ! IEU1 Group
@@ -866,9 +866,9 @@ normal_retl:
ASI_SETSRC_NOBLK ! LSU Group
EX(LDX [%o1] ASINORMAL, %g2,
and %o2, 0xf) ! Load Group
- add %o1, 8, %o1 ! IEU0
+ add %o0, 8, %o0 ! IEU0
ASI_SETDST_NOBLK ! LSU Group
- add %o0, 8, %o0 ! IEU0 Group
+ add %o1, 8, %o1 ! IEU0 Group
EX(STX %g2, [%o0 - 0x8] ASINORMAL,
and %o2, 0xf) ! Store
85: be,pt %xcc, 1f ! CTI
@@ -876,9 +876,9 @@ normal_retl:
ASI_SETSRC_NOBLK ! LSU Group
EX(LDUW [%o1] ASINORMAL, %g2,
and %o2, 0x7) ! Load Group
- add %o1, 4, %o1 ! IEU0
+ add %o0, 4, %o0 ! IEU0
ASI_SETDST_NOBLK ! LSU Group
- add %o0, 4, %o0 ! IEU0 Group
+ add %o1, 4, %o1 ! IEU0 Group
EX(STW %g2, [%o0 - 0x4] ASINORMAL,
and %o2, 0x7) ! Store
1: be,pt %xcc, 1f ! CTI
@@ -886,9 +886,9 @@ normal_retl:
ASI_SETSRC_NOBLK ! LSU Group
EX(LDUH [%o1] ASINORMAL, %g2,
and %o2, 0x3) ! Load Group
- add %o1, 2, %o1 ! IEU0
+ add %o0, 2, %o0 ! IEU0
ASI_SETDST_NOBLK ! LSU Group
- add %o0, 2, %o0 ! IEU0 Group
+ add %o1, 2, %o1 ! IEU0 Group
EX(STH %g2, [%o0 - 0x2] ASINORMAL,
and %o2, 0x3) ! Store
1: be,pt %xcc, 1f ! CTI
@@ -920,7 +920,7 @@ memcpy_noVIS_misaligned:
add %o2, 1) ! Store
2:
#ifdef __KERNEL__
- wr %g0, FPRS_FEF, %fprs ! FPU Group
+ VISEntry
#endif
andn %o2, 7, %g5 ! IEU0 Group
and %o2, 7, %o2 ! IEU1
@@ -976,16 +976,31 @@ fpu_retl:
.section .fixup
.align 4
VIScopyfixup_reto2:
- mov %o2, %o0
+ mov %o2, %o1
VIScopyfixup_ret:
+ /* If this is copy_from_user(), zero out the rest of the
+ * kernel buffer.
+ */
+ andcc asi_src, 0x1, %g0
+ be,pt %icc, 1f
+ andcc asi_dest, 0x1, %g0
+ bne,pn %icc, 1f
+ VISExit
+ save %sp, -160, %sp
+ mov %i0, %o0
+ call __bzero
+ mov %i1, %o1
+ restore
+1: mov %o1, %o0
retl
- wr %g0, 0, %fprs
+ nop
VIScopyfixup1: subcc %g2, 18, %g2
+ add %o0, 32, %o0
bgeu,a,pt %icc, VIScopyfixup1
sub %g7, 32, %g7
+ sub %o0, 32, %o0
rd %pc, %g5
- add %g2, 18, %g2
- add %g2, 20, %g2
+ add %g2, (18 + 16), %g2
ldub [%g5 + %g2], %g2
ba,a,pt %xcc, 2f
.byte 0, 0, 0, 0, 0, 0, 0, 4, 4, 8, 12, 12, 16, 20, 20, 24, 28, 28
@@ -994,41 +1009,43 @@ VIScopyfixup2: mov (7 * 16), %g7
1: subcc %g2, 10, %g2
bgeu,a,pt %icc, 1b
sub %g7, 16, %g7
+ sub %o0, %g7, %o0
rd %pc, %g5
- add %g2, 10, %g2
- add %g2, 20, %g2
+ add %g2, (10 + 16), %g2
ldub [%g5 + %g2], %g2
ba,a,pt %xcc, 4f
.byte 0, 0, 0, 0, 0, 4, 4, 8, 12, 12
.align 4
VIScopyfixup3: subcc %g2, 10, %g2
+ add %o0, 32, %o0
bgeu,a,pt %icc, VIScopyfixup3
sub %g7, 32, %g7
+ sub %o0, 32, %o0
rd %pc, %g5
- add %g2, 10, %g2
- add %g2, 20, %g2
+ add %g2, (10 + 16), %g2
ldub [%g5 + %g2], %g2
ba,a,pt %xcc, 2f
.byte 0, 0, 0, 0, 0, 0, 0, 8, 16, 24
.align 4
-2: and %g1, 0x7f, %g1
+2: and %o2, 0x7f, %o2
sub %g7, %g2, %g7
ba,pt %xcc, VIScopyfixup_ret
- add %g7, %g1, %o0
+ add %g7, %o2, %o1
VIScopyfixup4: mov (7 * 16), %g7
3: subcc %g2, 6, %g2
bgeu,a,pt %icc, 3b
sub %g7, 16, %g7
+ sub %o0, %g7, %o0
rd %pc, %g5
- add %g2, 6, %g2
- add %g2, 20, %g2
+ add %g2, (6 + 16), %g2
ldub [%g5 + %g2], %g2
ba,a,pt %xcc, 4f
.byte 0, 0, 0, 0, 0, 8
.align 4
-4: and %g1, 7, %g1
+4: and %o2, 0xf, %o2
+ sub %g7, %g2, %g7
ba,pt %xcc, VIScopyfixup_ret
- add %g7, %g1, %o0
+ add %g7, %o2, %o1
VIScopyfixup_vis3:
sub %o2, 0x80, %o2
VIScopyfixup_vis2:
@@ -1038,13 +1055,13 @@ VIScopyfixup_vis0:
VIScopyfixup_vis1:
add %g7, %g3, %g7
ba,pt %xcc, VIScopyfixup_ret
- add %o2, %g7, %o0
+ add %o2, %g7, %o1
VIScopyfixup_vis5:
add %g3, 8, %g3
VIScopyfixup_vis4:
add %g3, 8, %g3
ba,pt %xcc, VIScopyfixup_ret
- add %o2, %g3, %o0
+ add %o2, %g3, %o1
#endif
#ifdef __KERNEL__
diff --git a/arch/sparc64/lib/VIScsum.S b/arch/sparc64/lib/VIScsum.S
index 81b020c49..a370bdff3 100644
--- a/arch/sparc64/lib/VIScsum.S
+++ b/arch/sparc64/lib/VIScsum.S
@@ -1,4 +1,4 @@
-/* $Id: VIScsum.S,v 1.2 1997/08/08 08:34:05 jj Exp $
+/* $Id: VIScsum.S,v 1.3 1998/06/12 14:53:57 jj Exp $
* VIScsum.S: High bandwidth IP checksumming utilizing the UltraSparc
* Visual Instruction Set.
*
@@ -26,6 +26,7 @@
#ifdef __KERNEL__
#include <asm/head.h>
#include <asm/asi.h>
+#include <asm/visasm.h>
#else
#define ASI_BLK_P 0xf0
#define FRPS_FEF 0x04
@@ -278,13 +279,13 @@ csum_partial:
add %o2, 1, %o2 /* IEU0 */
3: cmp %o1, 0xc0 /* IEU1 Group */
blu,pn %icc, 20f /* CTI */
- sllx %o2, 32, %g1 /* IEU0 */
- addcc %o2, %g1, %o2 /* IEU1 Group */
- sub %o1, 0xc0, %o1 /* IEU0 */
- wr %g0, ASI_BLK_P, %asi /* LSU Group */
+ sllx %o2, 32, %g5 /* IEU0 */
#ifdef __KERNEL__
- wr %g0, FPRS_FEF, %fprs /* LSU Group */
+ VISEntry
#endif
+ addcc %o2, %g5, %o2 /* IEU1 Group */
+ sub %o1, 0xc0, %o1 /* IEU0 */
+ wr %g0, ASI_BLK_P, %asi /* LSU Group */
membar #StoreLoad /* LSU Group */
srlx %o2, 32, %o2 /* IEU0 Group */
bcs,a,pn %xcc, 1f /* CTI */
@@ -340,7 +341,7 @@ csum_partial:
END_THE_TRICK(f60,f62,f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30)
and %o1, 0x3f, %o1 /* IEU0 Group */
#ifdef __KERNEL__
- wr %g0, 0, %fprs /* LSU Group */
+ VISExit
#endif
20: andcc %o1, 0xf0, %g1 /* IEU1 Group */
be,pn %icc, 23f /* CTI */
diff --git a/arch/sparc64/lib/VIScsumcopy.S b/arch/sparc64/lib/VIScsumcopy.S
index fff41bab2..469b007fc 100644
--- a/arch/sparc64/lib/VIScsumcopy.S
+++ b/arch/sparc64/lib/VIScsumcopy.S
@@ -1,4 +1,4 @@
-/* $Id: VIScsumcopy.S,v 1.4 1998/04/01 08:29:52 davem Exp $
+/* $Id: VIScsumcopy.S,v 1.5 1998/06/12 14:53:48 jj Exp $
* VIScsumcopy.S: High bandwidth IP checksumming with simultaneous
* copying utilizing the UltraSparc Visual Instruction Set.
*
@@ -27,6 +27,7 @@
#include <asm/head.h>
#include <asm/asi.h>
#include <asm/page.h>
+#include <asm/visasm.h>
#else
#define ASI_P 0x80
#define ASI_BLK_P 0xf0
@@ -42,11 +43,11 @@
#define sum o3
#define x1 g1
#define x2 g2
-#define x3 g3
+#define x3 o4
#define x4 g4
#define x5 g5
#define x6 g7
-#define x7 o4
+#define x7 g3
#define x8 o5
/* Dobrou noc, SunSoft engineers. Spete sladce.
@@ -248,7 +249,7 @@
csum_partial_copy_vis:
andcc %dst, 7, %g0 /* IEU1 Group */
be,pt %icc, 4f /* CTI */
- and %dst, 0x38, %g3 /* IEU0 */
+ and %dst, 0x38, %o4 /* IEU0 */
mov 1, %g5 /* IEU0 Group */
andcc %dst, 2, %g0 /* IEU1 */
be,pt %icc, 1f /* CTI */
@@ -266,18 +267,18 @@ csum_partial_copy_vis:
add %sum, %g5, %sum /* IEU0 */
1: lduwa [%src] %asi, %g2 /* Load */
brz,a,pn %g7, 4f /* CTI+IEU1 Group */
- and %dst, 0x38, %g3 /* IEU0 */
+ and %dst, 0x38, %o4 /* IEU0 */
add %dst, 4, %dst /* IEU0 Group */
sub %len, 4, %len /* IEU1 */
addcc %g2, %sum, %sum /* IEU1 Group */
bcs,a,pn %icc, 1f /* CTI */
add %sum, 1, %sum /* IEU0 */
-1: and %dst, 0x38, %g3 /* IEU0 Group */
+1: and %dst, 0x38, %o4 /* IEU0 Group */
stw %g2, [%dst - 4] /* Store */
add %src, 4, %src /* IEU1 */
4:
#ifdef __KERNEL__
- wr %g0, FPRS_FEF, %fprs /* LSU Group */
+ VISEntry
#endif
mov %src, %g7 /* IEU1 Group */
fzero %f48 /* FPA */
@@ -291,10 +292,10 @@ csum_partial_copy_vis:
sub %sum, 1, %sum /* IEU0 */
1: srl %sum, 0, %sum /* IEU0 Group */
clr %g5 /* IEU1 */
- brz,pn %g3, 3f /* CTI+IEU1 Group */
- sub %g1, %g3, %g1 /* IEU0 */
+ brz,pn %o4, 3f /* CTI+IEU1 Group */
+ sub %g1, %o4, %g1 /* IEU0 */
ldda [%src] %asi, %f0 /* Load */
- clr %g3 /* IEU0 Group */
+ clr %o4 /* IEU0 Group */
andcc %dst, 8, %g0 /* IEU1 */
be,pn %icc, 1f /* CTI */
ldda [%src + 8] %asi, %f2 /* Load Group */
@@ -303,7 +304,7 @@ csum_partial_copy_vis:
fpadd32 %f0, %f48, %f50 /* FPA */
addcc %dst, 8, %dst /* IEU1 Group */
faligndata %f0, %f2, %f16 /* FPA */
- fcmpgt32 %f48, %f50, %g3 /* FPM Group */
+ fcmpgt32 %f48, %f50, %o4 /* FPM Group */
fmovd %f2, %f0 /* FPA Group */
ldda [%src + 8] %asi, %f2 /* Load */
std %f16, [%dst - 8] /* Store */
@@ -318,13 +319,13 @@ csum_partial_copy_vis:
faligndata %f0, %f2, %f16 /* FPA */
fcmpgt32 %f48, %f50, %g5 /* FPM Group */
sub %len, 16, %len /* IEU0 */
- inc %g3 /* IEU1 */
+ inc %o4 /* IEU1 */
std %f16, [%dst - 16] /* Store Group */
fpadd32 %f2, %f50, %f48 /* FPA */
- srl %g3, 1, %o5 /* IEU0 */
+ srl %o4, 1, %o5 /* IEU0 */
faligndata %f2, %f4, %f18 /* FPA Group */
std %f18, [%dst - 8] /* Store */
- fcmpgt32 %f50, %f48, %g3 /* FPM Group */
+ fcmpgt32 %f50, %f48, %o4 /* FPM Group */
add %o5, %sum, %sum /* IEU0 */
ldda [%src + 8] %asi, %f2 /* Load */
fmovd %f4, %f0 /* FPA */
@@ -337,18 +338,18 @@ csum_partial_copy_vis:
add %dst, 32, %dst /* IEU1 */
faligndata %f0, %f2, %f16 /* FPA */
fcmpgt32 %f48, %f50, %o5 /* FPM Group */
- inc %g3 /* IEU0 */
+ inc %o4 /* IEU0 */
ldda [%src + 24] %asi, %f6 /* Load */
- srl %g3, 1, %g3 /* IEU0 Group */
+ srl %o4, 1, %o4 /* IEU0 Group */
add %g5, %sum, %sum /* IEU1 */
ldda [%src + 32] %asi, %f8 /* Load */
fpadd32 %f2, %f50, %f48 /* FPA */
faligndata %f2, %f4, %f18 /* FPA Group */
sub %len, 32, %len /* IEU0 */
std %f16, [%dst - 32] /* Store */
- fcmpgt32 %f50, %f48, %o4 /* FPM Group */
+ fcmpgt32 %f50, %f48, %g3 /* FPM Group */
inc %o5 /* IEU0 */
- add %g3, %sum, %sum /* IEU1 */
+ add %o4, %sum, %sum /* IEU1 */
fpadd32 %f4, %f48, %f50 /* FPA */
faligndata %f4, %f6, %f20 /* FPA Group */
srl %o5, 1, %o5 /* IEU0 */
@@ -356,14 +357,14 @@ csum_partial_copy_vis:
add %o5, %sum, %sum /* IEU0 */
std %f18, [%dst - 24] /* Store */
fpadd32 %f6, %f50, %f48 /* FPA */
- inc %o4 /* IEU0 Group */
+ inc %g3 /* IEU0 Group */
std %f20, [%dst - 16] /* Store */
add %src, 32, %src /* IEU1 */
faligndata %f6, %f8, %f22 /* FPA */
- fcmpgt32 %f50, %f48, %g3 /* FPM Group */
- srl %o4, 1, %o4 /* IEU0 */
+ fcmpgt32 %f50, %f48, %o4 /* FPM Group */
+ srl %g3, 1, %g3 /* IEU0 */
std %f22, [%dst - 8] /* Store */
- add %o4, %sum, %sum /* IEU0 Group */
+ add %g3, %sum, %sum /* IEU0 Group */
3: rd %asi, %g2 /* LSU Group + 4 bubbles */
#ifdef __KERNEL__
4: sethi %hi(vis0s), %g7 /* IEU0 Group */
@@ -371,16 +372,16 @@ csum_partial_copy_vis:
4: rd %pc, %g7 /* LSU Group + 4 bubbles */
#endif
inc %g5 /* IEU0 Group */
- and %src, 0x38, %o4 /* IEU1 */
+ and %src, 0x38, %g3 /* IEU1 */
membar #StoreLoad /* LSU Group */
srl %g5, 1, %g5 /* IEU0 */
- inc %g3 /* IEU1 */
- sll %o4, 8, %o4 /* IEU0 Group */
+ inc %o4 /* IEU1 */
+ sll %g3, 8, %g3 /* IEU0 Group */
sub %len, 0xc0, %len /* IEU1 */
addcc %g5, %sum, %sum /* IEU1 Group */
- srl %g3, 1, %g3 /* IEU0 */
- add %g7, %o4, %g7 /* IEU0 Group */
- add %g3, %sum, %sum /* IEU1 */
+ srl %o4, 1, %o4 /* IEU0 */
+ add %g7, %g3, %g7 /* IEU0 Group */
+ add %o4, %sum, %sum /* IEU1 */
#ifdef __KERNEL__
jmpl %g7 + %lo(vis0s), %g0 /* CTI+IEU1 Group */
#else
@@ -815,7 +816,7 @@ ett: rd %gsr, %x3 /* LSU Group+4bubbles */
END_THE_TRICK2( f48,f50,f52,f54,f56,f58,f60,f10,f12,f62)
membar #Sync /* LSU Group */
#ifdef __KERNEL__
- wr %g0, 0, %fprs /* LSU Group */
+ VISExit
add %sp, 8, %sp /* IEU0 Group */
#endif
23: brnz,pn %len, 26f /* CTI+IEU1 Group */
@@ -834,12 +835,12 @@ ett: rd %gsr, %x3 /* LSU Group+4bubbles */
#endif
26: andcc %len, 8, %g0 /* IEU1 Group */
be,pn %icc, 1f /* CTI */
- lduwa [%src] %asi, %g3 /* Load */
+ lduwa [%src] %asi, %o4 /* Load */
lduwa [%src+4] %asi, %g2 /* Load Group */
add %src, 8, %src /* IEU0 */
add %dst, 8, %dst /* IEU1 */
- sllx %g3, 32, %g5 /* IEU0 Group */
- stw %g3, [%dst - 8] /* Store */
+ sllx %o4, 32, %g5 /* IEU0 Group */
+ stw %o4, [%dst - 8] /* Store */
or %g5, %g2, %g5 /* IEU0 Group */
stw %g2, [%dst - 4] /* Store */
addcc %g5, %sum, %sum /* IEU1 Group */
@@ -855,11 +856,11 @@ ett: rd %gsr, %x3 /* LSU Group+4bubbles */
stw %g7, [%dst - 4] /* Store */
1: andcc %len, 2, %g0 /* IEU1 */
be,a,pn %icc, 1f /* CTI */
- clr %o4 /* IEU0 Group */
+ clr %g3 /* IEU0 Group */
lduha [%src] %asi, %g7 /* Load */
add %src, 2, %src /* IEU1 */
add %dst, 2, %dst /* IEU0 Group */
- sll %g7, 16, %o4 /* IEU0 Group */
+ sll %g7, 16, %g3 /* IEU0 Group */
sth %g7, [%dst - 2] /* Store */
1: andcc %len, 1, %g0 /* IEU1 */
be,a,pn %icc, 1f /* CTI */
@@ -867,9 +868,9 @@ ett: rd %gsr, %x3 /* LSU Group+4bubbles */
lduba [%src] %asi, %g7 /* Load */
sll %g7, 8, %o5 /* IEU0 Group */
stb %g7, [%dst] /* Store */
-1: or %g2, %o4, %o4 /* IEU1 */
- or %o5, %o4, %o4 /* IEU0 Group (regdep) */
- addcc %o4, %sum, %sum /* IEU1 Group (regdep) */
+1: or %g2, %g3, %g3 /* IEU1 */
+ or %o5, %g3, %g3 /* IEU0 Group (regdep) */
+ addcc %g3, %sum, %sum /* IEU1 Group (regdep) */
bcs,a,pn %xcc, 1f /* CTI */
add %sum, 1, %sum /* IEU0 */
1: ba,pt %xcc, 25b /* CTI Group */
diff --git a/arch/sparc64/lib/VISmemset.S b/arch/sparc64/lib/VISmemset.S
index 4c24931ba..9be111134 100644
--- a/arch/sparc64/lib/VISmemset.S
+++ b/arch/sparc64/lib/VISmemset.S
@@ -1,4 +1,4 @@
-/* $Id: VISmemset.S,v 1.7 1997/08/22 15:54:56 jj Exp $
+/* $Id: VISmemset.S,v 1.8 1998/06/12 14:53:59 jj Exp $
* VISmemset.S: High speed memset operations utilizing the UltraSparc
* Visual Instruction Set.
*
@@ -32,6 +32,9 @@
#endif
#ifdef __KERNEL__
+
+#include <asm/visasm.h>
+
#define RETL clr %o0
#else
#define RETL mov %g3, %o0
@@ -135,8 +138,9 @@ memset:
#endif
add %o0, 32, %o0
7: be,pn %xcc, 9f
+ nop
#ifdef __KERNEL__
- wr %g0, FPRS_FEF, %fprs
+ VISEntryHalf
#endif
ldd [%o0 - 8], %f0
18: wr %g0, ASI_BLK_P, %asi
@@ -170,7 +174,7 @@ memset:
add %o0, 256, %o0
12:
#ifdef __KERNEL__
- wr %g0, 0, %fprs
+ VISExitHalf
#else
#ifndef REGS_64BIT
wr %g0, FPRS_FEF, %fprs
@@ -231,10 +235,9 @@ memset:
#endif
andncc %o2, 0x3f, %o3
be,pn %xcc, 9b
-#ifdef __KERNEL__
- wr %g0, FPRS_FEF, %fprs
-#else
nop
+#ifdef __KERNEL__
+ VISEntryHalf
#endif
ba,pt %xcc, 18b
ldd [%o0], %f0
diff --git a/arch/sparc64/lib/VISsave.S b/arch/sparc64/lib/VISsave.S
new file mode 100644
index 000000000..10d127bb5
--- /dev/null
+++ b/arch/sparc64/lib/VISsave.S
@@ -0,0 +1,122 @@
+/* $Id: VISsave.S,v 1.2 1998/06/19 12:14:25 jj Exp $
+ * VISsave.S: Code for saving FPU register state for
+ * VIS routines. One should not call this directly,
+ * but use macros provided in <asm/visasm.h>.
+ *
+ * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
+#include <asm/asi.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/visasm.h>
+
+ .text
+ .globl VISenter, VISenterhalf
+
+ /* On entry: %o5=current FPRS value, %g7 is callers address */
+ /* May clobber %o5, %g1, %g2, %g3, %g7, %icc, %xcc */
+
+ .align 32
+VISenter:
+ ldub [%g6 + AOFF_task_tss + AOFF_thread_fpdepth], %g1
+ brnz,a,pn %g1, 1f
+ cmp %g1, 1
+ stb %g0, [%g6 + AOFF_task_tss + AOFF_thread_fpsaved]
+ stx %fsr, [%g6 + AOFF_task_tss + AOFF_thread_xfsr]
+9: jmpl %g7 + %g0, %g0
+ nop
+1: bne,pn %icc, 2f
+
+ srl %g1, 1, %g1
+vis1: ldub [%g6 + AOFF_task_tss + AOFF_thread_fpsaved], %g3
+ stx %fsr, [%g6 + AOFF_task_tss + AOFF_thread_xfsr]
+ or %g3, %o5, %g3
+ stb %g3, [%g6 + AOFF_task_tss + AOFF_thread_fpsaved]
+ rd %gsr, %g3
+ clr %g1
+ ba,pt %xcc, 3f
+
+ stb %g3, [%g6 + AOFF_task_tss + AOFF_thread_gsr]
+2: add %g6, %g1, %g3
+ cmp %o5, FPRS_DU
+ be,pn %icc, 6f
+ sll %g1, 3, %g1
+ stb %o5, [%g3 + AOFF_task_tss + AOFF_thread_fpsaved]
+ rd %gsr, %g2
+ stb %g2, [%g3 + AOFF_task_tss + AOFF_thread_gsr]
+
+ add %g6, %g1, %g2
+ stx %fsr, [%g2 + AOFF_task_tss + AOFF_thread_xfsr]
+ sll %g1, 5, %g1
+3: andcc %o5, FPRS_DL|FPRS_DU, %g0
+ be,pn %icc, 9b
+ add %g6, AOFF_task_fpregs, %g2
+ andcc %o5, FPRS_DL, %g0
+ membar #StoreStore | #LoadStore
+
+ be,pn %icc, 4f
+ add %g6, AOFF_task_fpregs+0x40, %g3
+ stda %f0, [%g2 + %g1] ASI_BLK_P
+ stda %f16, [%g3 + %g1] ASI_BLK_P
+ andcc %o5, FPRS_DU, %g0
+ be,pn %icc, 5f
+4: add %g1, 128, %g1
+ stda %f32, [%g2 + %g1] ASI_BLK_P
+
+ stda %f48, [%g3 + %g1] ASI_BLK_P
+5: membar #Sync
+ jmpl %g7 + %g0, %g0
+ nop
+
+6: ldub [%g3 + AOFF_task_tss + AOFF_thread_fpsaved], %o5
+ or %o5, FPRS_DU, %o5
+ add %g6, AOFF_task_fpregs+0x80, %g2
+ stb %o5, [%g3 + AOFF_task_tss + AOFF_thread_fpsaved]
+
+ sll %g1, 5, %g1
+ add %g6, AOFF_task_fpregs+0xc0, %g3
+ membar #StoreStore | #LoadStore
+ stda %f32, [%g2 + %g1] ASI_BLK_P
+ stda %f48, [%g3 + %g1] ASI_BLK_P
+ membar #Sync
+ jmpl %g7 + %g0, %g0
+ nop
+
+ .align 32
+VISenterhalf:
+ ldub [%g6 + AOFF_task_tss + AOFF_thread_fpdepth], %g1
+ brnz,a,pn %g1, 1f
+ cmp %g1, 1
+ stb %g0, [%g6 + AOFF_task_tss + AOFF_thread_fpsaved]
+ stx %fsr, [%g6 + AOFF_task_tss + AOFF_thread_xfsr]
+ clr %o5
+ jmpl %g7 + %g0, %g0
+ wr %g0, FPRS_FEF, %fprs
+
+1: bne,pn %icc, 2f
+ srl %g1, 1, %g1
+ ba,pt %xcc, vis1
+ sub %g7, 8, %g7
+2: addcc %g6, %g1, %g3
+ sll %g1, 3, %g1
+ andn %o5, FPRS_DU, %g2
+ stb %g2, [%g3 + AOFF_task_tss + AOFF_thread_fpsaved]
+
+ rd %gsr, %g2
+ stb %g2, [%g3 + AOFF_task_tss + AOFF_thread_gsr]
+ add %g6, %g1, %g2
+ stx %fsr, [%g2 + AOFF_task_tss + AOFF_thread_xfsr]
+ sll %g1, 5, %g1
+3: andcc %o5, FPRS_DL, %g0
+ be,pn %icc, 4f
+ add %g6, AOFF_task_fpregs, %g2
+
+ membar #StoreStore | #LoadStore
+ add %g6, AOFF_task_fpregs+0x40, %g3
+ stda %f0, [%g2 + %g1] ASI_BLK_P
+ stda %f16, [%g3 + %g1] ASI_BLK_P
+ membar #Sync
+4: and %o5, FPRS_DU, %o5
+ jmpl %g7 + %g0, %g0
+ wr %o5, FPRS_FEF, %fprs
diff --git a/arch/sparc64/lib/blockops.S b/arch/sparc64/lib/blockops.S
index 7d5b240ad..c57f0aefc 100644
--- a/arch/sparc64/lib/blockops.S
+++ b/arch/sparc64/lib/blockops.S
@@ -1,52 +1,66 @@
-/* $Id: blockops.S,v 1.11 1997/07/29 09:35:36 davem Exp $
- * arch/sparc64/lib/blockops.S: UltraSparc block zero optimized routines.
+/* $Id: blockops.S,v 1.14 1998/06/12 14:53:46 jj Exp $
+ * blockops.S: UltraSparc block zero optimized routines.
*
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996,1998 David S. Miller (davem@caip.rutgers.edu)
* Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
*/
#include "VIS.h"
+#include <asm/visasm.h>
+
+#define TOUCH(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7) \
+ fmovd %reg0, %f48; fmovd %reg1, %f50; \
+ fmovd %reg2, %f52; fmovd %reg3, %f54; \
+ fmovd %reg4, %f56; fmovd %reg5, %f58; \
+ fmovd %reg6, %f60; fmovd %reg7, %f62;
.text
.align 32
-
- .globl __bfill64
-__bfill64: /* %o0 = buf, %o1= ptr to pattern */
- wr %g0, FPRS_FEF, %fprs ! FPU Group
- ldd [%o1], %f48 ! Load Group
- wr %g0, ASI_BLK_P, %asi ! LSU Group
- membar #StoreLoad | #StoreStore | #LoadStore ! LSU Group
- mov 32, %g2 ! IEU0 Group
-
- /* Cannot perform real arithmatic on the pattern, that can
- * lead to fp_exception_other ;-)
- */
- fmovd %f48, %f50 ! FPA Group
- fmovd %f48, %f52 ! FPA Group
- fmovd %f48, %f54 ! FPA Group
- fmovd %f48, %f56 ! FPA Group
- fmovd %f48, %f58 ! FPA Group
- fmovd %f48, %f60 ! FPA Group
- fmovd %f48, %f62 ! FPA Group
-
-1: stda %f48, [%o0 + 0x00] %asi ! Store Group
- stda %f48, [%o0 + 0x40] %asi ! Store Group
- stda %f48, [%o0 + 0x80] %asi ! Store Group
- stda %f48, [%o0 + 0xc0] %asi ! Store Group
- subcc %g2, 1, %g2 ! IEU1 Group
- bne,pt %icc, 1b ! CTI
- add %o0, 0x100, %o0 ! IEU0
- membar #StoreLoad | #StoreStore ! LSU Group
-
- jmpl %o7 + 0x8, %g0 ! CTI Group brk forced
- wr %g0, 0, %fprs ! FPU Group
+ .globl copy_page
+ .type copy_page,@function
+copy_page: /* %o0=dest, %o1=src */
+ VISEntry
+ membar #LoadStore | #StoreStore | #StoreLoad
+ ldda [%o1] ASI_BLK_P, %f0
+ add %o1, 0x40, %o1
+ ldda [%o1] ASI_BLK_P, %f16
+ add %o1, 0x40, %o1
+ sethi %hi(8192), %o2
+1: TOUCH(f0, f2, f4, f6, f8, f10, f12, f14)
+ ldda [%o1] ASI_BLK_P, %f32
+ add %o1, 0x40, %o1
+ sub %o2, 0x40, %o2
+ stda %f48, [%o0] ASI_BLK_P
+ add %o0, 0x40, %o0
+ TOUCH(f16, f18, f20, f22, f24, f26, f28, f30)
+ ldda [%o1] ASI_BLK_P, %f0
+ add %o1, 0x40, %o1
+ sub %o2, 0x40, %o2
+ stda %f48, [%o0] ASI_BLK_P
+ add %o0, 0x40, %o0
+ TOUCH(f32, f34, f36, f38, f40, f42, f44, f46)
+ ldda [%o1] ASI_BLK_P, %f16
+ add %o1, 0x40, %o1
+ sub %o2, 0x40, %o2
+ stda %f48, [%o0] ASI_BLK_P
+ cmp %o2, 0x80
+ bne,pt %xcc, 1b
+ add %o0, 0x40, %o0
+ membar #Sync
+ stda %f0, [%o0] ASI_BLK_P
+ add %o0, 0x40, %o0
+ stda %f16, [%o0] ASI_BLK_P
+ membar #StoreStore | #StoreLoad
+ jmpl %o7 + 0x8, %g0
+ VISExit
.align 32
.globl __bzero_1page
-__bzero_1page:
- wr %g0, FPRS_FEF, %fprs ! FPU Group
+ .type __bzero_1page,@function
+__bzero_1page: /* %o0=dest */
+ VISEntryHalf
fzero %f0 ! FPA Group
- mov 32, %g1 ! IEU0
+ mov 32, %o1 ! IEU0
fzero %f2 ! FPA Group
faddd %f0, %f2, %f4 ! FPA Group
fmuld %f0, %f2, %f6 ! FPM
@@ -62,9 +76,9 @@ __bzero_1page:
stda %f0, [%o0 + 0x80] %asi ! Store Group
stda %f0, [%o0 + 0xc0] %asi ! Store Group
- subcc %g1, 1, %g1 ! IEU1
+ subcc %o1, 1, %o1 ! IEU1
bne,pt %icc, 1b ! CTI
add %o0, 0x100, %o0 ! IEU0 Group
- membar #StoreLoad | #StoreStore ! LSU Group
+ membar #StoreStore | #StoreLoad ! LSU Group
jmpl %o7 + 0x8, %g0 ! CTI Group brk forced
- wr %g0, 0, %fprs ! FPU Group
+ VISExitHalf
diff --git a/arch/sparc64/lib/memscan.S b/arch/sparc64/lib/memscan.S
index 83abe4040..423bc1409 100644
--- a/arch/sparc64/lib/memscan.S
+++ b/arch/sparc64/lib/memscan.S
@@ -1,116 +1,129 @@
-/* $Id: memscan.S,v 1.1 1997/03/14 21:04:24 jj Exp $
- * memscan.S: Optimized memscan for the Sparc64.
+/* $Id: memscan.S,v 1.2 1998/05/21 14:42:22 jj Exp $
+ * memscan.S: Optimized memscan for Sparc64.
*
- * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 David S. Miller (davem@dm.cobaltmicro.com)
*/
-/* In essence, this is just a fancy strlen. */
-
-#define LO_MAGIC 0x01010101
-#define HI_MAGIC 0x80808080
+#define HI_MAGIC 0x8080808080808080
+#define LO_MAGIC 0x0101010101010101
+#define ASI_PL 0x88
.text
- .align 4
- .globl __memscan_zero, __memscan_generic
- .globl memscan
+ .align 32
+ .globl __memscan_zero, __memscan_generic
+ .globl memscan
+
__memscan_zero:
- /* %o0 = addr, %o1 = size */
- brlez,pn %o1, 0f
- andcc %o0, 3, %g0
- be,pt %icc, 9f
- sethi %hi(HI_MAGIC), %o4
- ldub [%o0], %o5
- subcc %o1, 1, %o1
- brz,pn %o5, 10f
- add %o0, 1, %o0
- be,pn %xcc, 0f
- andcc %o0, 3, %g0
- be,pn %icc, 4f
- or %o4, %lo(HI_MAGIC), %o3
- ldub [%o0], %o5
- subcc %o1, 1, %o1
- brz,pn %o5, 10f
- add %o0, 1, %o0
- be,pn %xcc, 0f
- andcc %o0, 3, %g0
- be,pt %icc, 5f
- sethi %hi(LO_MAGIC), %o4
- ldub [%o0], %o5
- subcc %o1, 1, %o1
- brz,pn %o5, 10f
- add %o0, 1, %o0
- be,pn %xcc, 0f
- or %o4, %lo(LO_MAGIC), %o2
- ba,pt %xcc, 2f
- ld [%o0], %o5
-9:
- or %o4, %lo(HI_MAGIC), %o3
-4:
- sethi %hi(LO_MAGIC), %o4
-5:
- or %o4, %lo(LO_MAGIC), %o2
- ld [%o0], %o5
-2:
- sub %o5, %o2, %o4
- sub %o1, 4, %o1
- andcc %o4, %o3, %g0
- be,pn %icc, 1f
- add %o0, 4, %o0
- brgz,pt %o1, 2b
- ld [%o0], %o5
+ /* %o0 = bufp, %o1 = size */
+ brlez,pn %o1, szzero
+ andcc %o0, 7, %g0
+ be,pt %icc, we_are_aligned
+ sethi %hi(HI_MAGIC), %o4
+ ldub [%o0], %o5
+1: subcc %o1, 1, %o1
+ brz,pn %o5, 10f
+ add %o0, 1, %o0
+ be,pn %xcc, szzero
+ andcc %o0, 7, %g0
+ bne,a,pn %icc, 1b
+ ldub [%o0], %o5
+we_are_aligned:
+ ldxa [%o0] ASI_PL, %o5
+ or %o4, %lo(HI_MAGIC), %o3
+ sllx %o3, 32, %o4
+ or %o4, %o3, %o3
+
+ srlx %o3, 7, %o2
+msloop:
+ sub %o1, 8, %o1
+ add %o0, 8, %o0
+ sub %o5, %o2, %o4
+ xor %o4, %o5, %o4
+ andcc %o4, %o3, %g3
+ bne,pn %xcc, check_bytes
+ srlx %o4, 32, %g3
+
+ brgz,a,pt %o1, msloop
+ ldxa [%o0] ASI_PL, %o5
+check_bytes:
+ bne,a,pn %icc, 2f
+ andcc %o5, 0xff, %g0
+ add %o0, -5, %g2
+ ba,pt %xcc, 3f
+ srlx %o5, 32, %g5
+
+2: srlx %o5, 8, %g5
+ be,pn %icc, 1f
+ add %o0, -8, %g2
+ andcc %g5, 0xff, %g0
+ srlx %g5, 8, %g5
+ be,pn %icc, 1f
+ inc %g2
+ andcc %g5, 0xff, %g0
+
+ srlx %g5, 8, %g5
+ be,pn %icc, 1f
+ inc %g2
+ andcc %g5, 0xff, %g0
+ srlx %g5, 8, %g5
+ be,pn %icc, 1f
+ inc %g2
+ andcc %g3, %o3, %g0
+
+ be,a,pn %icc, 2f
+ mov %o0, %g2
+3: andcc %g5, 0xff, %g0
+ srlx %g5, 8, %g5
+ be,pn %icc, 1f
+ inc %g2
+ andcc %g5, 0xff, %g0
+ srlx %g5, 8, %g5
+
+ be,pn %icc, 1f
+ inc %g2
+ andcc %g5, 0xff, %g0
+ srlx %g5, 8, %g5
+ be,pn %icc, 1f
+ inc %g2
+ andcc %g5, 0xff, %g0
+ srlx %g5, 8, %g5
+
+ be,pn %icc, 1f
+ inc %g2
+2: brgz,a,pt %o1, msloop
+ ldxa [%o0] ASI_PL, %o5
+ inc %g2
+1: add %o0, %o1, %o0
+ cmp %g2, %o0
retl
- add %o0, %o1, %o0
-1:
- /* Check every byte. */
- srl %o5, 24, %g5
- andcc %g5, 0xff, %g0
- be,pn %icc, 1f
- add %o0, -4, %o4
- srl %o5, 16, %g5
- andcc %g5, 0xff, %g0
- be,pn %icc, 1f
- add %o4, 1, %o4
- srl %o5, 8, %g5
- andcc %g5, 0xff, %g0
- be,pn %icc, 1f
- add %o4, 1, %o4
- andcc %o5, 0xff, %g0
- be,pn %icc, 1f
- add %o4, 1, %o4
- brgz,pt %o1, 2b
- ld [%o0], %o5
-1:
- add %o0, %o1, %o0
- cmp %o4, %o0
- retl
- movle %xcc, %o4, %o0
-0:
- retl
+
+ movle %xcc, %g2, %o0
+10: retl
+ sub %o0, 1, %o0
+szzero: retl
nop
-10:
- retl
- sub %o0, 1, %o0
memscan:
__memscan_generic:
/* %o0 = addr, %o1 = c, %o2 = size */
- brz,pn %o2, 3f
- add %o0, %o2, %o3
- ldub [%o0], %o5
- sub %g0, %o2, %o4
+ brz,pn %o2, 3f
+ add %o0, %o2, %o3
+ ldub [%o0], %o5
+ sub %g0, %o2, %o4
1:
- cmp %o5, %o1
- be,pn %icc, 2f
- addcc %o4, 1, %o4
- bne,a,pt %xcc, 1b
- ldub [%o3 + %o4], %o5
+ cmp %o5, %o1
+ be,pn %icc, 2f
+ addcc %o4, 1, %o4
+ bne,a,pt %xcc, 1b
+ ldub [%o3 + %o4], %o5
retl
/* The delay slot is the same as the next insn, this is just to make it look more awful */
2:
- add %o3, %o4, %o0
+ add %o3, %o4, %o0
retl
- sub %o0, 1, %o0
+ sub %o0, 1, %o0
3:
retl
nop