diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1998-03-17 22:05:47 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1998-03-17 22:05:47 +0000 |
commit | 27cfca1ec98e91261b1a5355d10a8996464b63af (patch) | |
tree | 8e895a53e372fa682b4c0a585b9377d67ed70d0e /arch/sparc64/lib/checksum.S | |
parent | 6a76fb7214c477ccf6582bd79c5b4ccc4f9c41b1 (diff) |
Look Ma' what I found on my harddisk ...
o New faster syscalls for 2.1.x, too
o Upgrade to 2.1.89.
Don't try to run this. It's flaky as hell. But feel free to debug ...
Diffstat (limited to 'arch/sparc64/lib/checksum.S')
-rw-r--r-- | arch/sparc64/lib/checksum.S | 244 |
1 files changed, 8 insertions, 236 deletions
diff --git a/arch/sparc64/lib/checksum.S b/arch/sparc64/lib/checksum.S index 5f35f136b..2e22ec2d8 100644 --- a/arch/sparc64/lib/checksum.S +++ b/arch/sparc64/lib/checksum.S @@ -35,204 +35,6 @@ /* I think I have an erection... Once _AGAIN_ the SunSoft * engineers are caught asleep at the keyboard, tsk tsk... */ -#define CSUMCOPY_ECACHE_LOAD(off, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldxa [%src + off + 0x00] %asi, t0; \ - ldxa [%src + off + 0x08] %asi, t1; \ - ldxa [%src + off + 0x10] %asi, t2; \ - ldxa [%src + off + 0x18] %asi, t3; \ - ldxa [%src + off + 0x20] %asi, t4; \ - ldxa [%src + off + 0x28] %asi, t5; \ - ldxa [%src + off + 0x30] %asi, t6; \ - ldxa [%src + off + 0x38] %asi, t7; \ - nop; nop; /* DO NOT TOUCH THIS!!!!! */ - -#define CSUMCOPY_EC_STALIGNED_LDNXT(off, t0, t1, t2, t3, t4, t5, t6, t7) \ - stx t0, [%dst + off - 0x40]; \ - addcc %sum, t0, %sum; \ - bcc,pt %xcc, 11f; \ - ldxa [%src + off + 0x00] %asi, t0; \ - add %sum, 1, %sum; \ -11: stx t1, [%dst + off - 0x38]; \ - addcc %sum, t1, %sum; \ - bcc,pt %xcc, 12f; \ - ldxa [%src + off + 0x08] %asi, t1; \ - add %sum, 1, %sum; \ -12: stx t2, [%dst + off - 0x30]; \ - addcc %sum, t2, %sum; \ - bcc,pt %xcc, 13f; \ - ldxa [%src + off + 0x10] %asi, t2; \ - add %sum, 1, %sum; \ -13: stx t3, [%dst + off - 0x28]; \ - addcc %sum, t3, %sum; \ - bcc,pt %xcc, 14f; \ - ldxa [%src + off + 0x18] %asi, t3; \ - add %sum, 1, %sum; \ -14: stx t4, [%dst + off - 0x20]; \ - addcc %sum, t4, %sum; \ - bcc,pt %xcc, 15f; \ - ldxa [%src + off + 0x20] %asi, t4; \ - add %sum, 1, %sum; \ -15: stx t5, [%dst + off - 0x18]; \ - addcc %sum, t5, %sum; \ - bcc,pt %xcc, 16f; \ - ldxa [%src + off + 0x28] %asi, t5; \ - add %sum, 1, %sum; \ -16: stx t6, [%dst + off - 0x10]; \ - addcc %sum, t6, %sum; \ - bcc,pt %xcc, 17f; \ - ldxa [%src + off + 0x30] %asi, t6; \ - add %sum, 1, %sum; \ -17: stx t7, [%dst + off - 0x08]; \ - addcc %sum, t7, %sum; \ - bcc,pt %xcc, 18f; \ - ldxa [%src + off + 0x38] %asi, t7; \ - add %sum, 1, %sum; \ -18: - -#define CSUMCOPY_EC_STUNALIGN_LDNXT(off, t0, t1, t2, t3, t4, t5, t6, t7) \ - stw t0, [%dst + off - 0x3c]; \ - addcc %sum, t0, %sum; \ - srlx t0, 32, t0; \ - stw t0, [%dst + off - 0x40]; \ - bcc,pt %xcc, 21f; \ - ldxa [%src + off + 0x00] %asi, t0; \ - add %sum, 1, %sum; \ -21: stw t1, [%dst + off - 0x34]; \ - addcc %sum, t1, %sum; \ - srlx t1, 32, t1; \ - stw t1, [%dst + off - 0x38]; \ - bcc,pt %xcc, 22f; \ - ldxa [%src + off + 0x08] %asi, t1; \ - add %sum, 1, %sum; \ -22: stw t2, [%dst + off - 0x2c]; \ - addcc %sum, t2, %sum; \ - srlx t2, 32, t2; \ - stw t2, [%dst + off - 0x30]; \ - bcc,pt %xcc, 23f; \ - ldxa [%src + off + 0x10] %asi, t2; \ - add %sum, 1, %sum; \ -23: stw t3, [%dst + off - 0x24]; \ - addcc %sum, t3, %sum; \ - srlx t3, 32, t3; \ - stw t3, [%dst + off - 0x28]; \ - bcc,pt %xcc, 24f; \ - ldxa [%src + off + 0x18] %asi, t3; \ - add %sum, 1, %sum; \ -24: stw t4, [%dst + off - 0x1c]; \ - addcc %sum, t4, %sum; \ - srlx t4, 32, t4; \ - stw t4, [%dst + off - 0x20]; \ - bcc,pt %xcc, 25f; \ - ldxa [%src + off + 0x20] %asi, t4; \ - add %sum, 1, %sum; \ -25: stw t5, [%dst + off - 0x14]; \ - addcc %sum, t5, %sum; \ - srlx t5, 32, t5; \ - stw t5, [%dst + off - 0x18]; \ - bcc,pt %xcc, 26f; \ - ldxa [%src + off + 0x28] %asi, t5; \ - add %sum, 1, %sum; \ -26: stw t6, [%dst + off - 0x0c]; \ - addcc %sum, t6, %sum; \ - srlx t6, 32, t6; \ - stw t6, [%dst + off - 0x10]; \ - bcc,pt %xcc, 27f; \ - ldxa [%src + off + 0x30] %asi, t6; \ - add %sum, 1, %sum; \ -27: stw t7, [%dst + off - 0x04]; \ - addcc %sum, t7, %sum; \ - srlx t7, 32, t7; \ - stw t7, [%dst + off - 0x08]; \ - bcc,pt %xcc, 28f; \ - ldxa [%src + off + 0x38] %asi, t7; \ - add %sum, 1, %sum; \ -28: - -#define CSUMCOPY_EC_STALIGNED(off, t0, t1, t2, t3, t4, t5, t6, t7) \ - addcc %sum, t0, %sum; \ - bcc,pt %xcc, 31f; \ - stx t0, [%dst + off + 0x00]; \ - add %sum, 1, %sum; \ -31: addcc %sum, t1, %sum; \ - bcc,pt %xcc, 32f; \ - stx t1, [%dst + off + 0x08]; \ - add %sum, 1, %sum; \ -32: addcc %sum, t2, %sum; \ - bcc,pt %xcc, 33f; \ - stx t2, [%dst + off + 0x10]; \ - add %sum, 1, %sum; \ -33: addcc %sum, t3, %sum; \ - bcc,pt %xcc, 34f; \ - stx t3, [%dst + off + 0x18]; \ - add %sum, 1, %sum; \ -34: addcc %sum, t4, %sum; \ - bcc,pt %xcc, 35f; \ - stx t4, [%dst + off + 0x20]; \ - add %sum, 1, %sum; \ -35: addcc %sum, t5, %sum; \ - bcc,pt %xcc, 36f; \ - stx t5, [%dst + off + 0x28]; \ - add %sum, 1, %sum; \ -36: addcc %sum, t6, %sum; \ - bcc,pt %xcc, 37f; \ - stx t6, [%dst + off + 0x30]; \ - add %sum, 1, %sum; \ -37: addcc %sum, t7, %sum; \ - bcc,pt %xcc, 38f; \ - stx t7, [%dst + off + 0x38]; \ - add %sum, 1, %sum; \ -38: - -#define CSUMCOPY_EC_STUNALIGN(off, t0, t1, t2, t3, t4, t5, t6, t7) \ - stw t0, [%dst + off + 0x04]; \ - addcc %sum, t0, %sum; \ - srlx t0, 32, t0; \ - bcc,pt %xcc, 41f; \ - stw t0, [%dst + off + 0x00]; \ - add %sum, 1, %sum; \ -41: stw t1, [%dst + off + 0x0c]; \ - addcc %sum, t1, %sum; \ - srlx t1, 32, t1; \ - bcc,pt %xcc, 42f; \ - stw t1, [%dst + off + 0x08]; \ - add %sum, 1, %sum; \ -42: stw t2, [%dst + off + 0x14]; \ - addcc %sum, t2, %sum; \ - srlx t2, 32, t2; \ - bcc,pt %xcc, 43f; \ - stw t2, [%dst + off + 0x10]; \ - add %sum, 1, %sum; \ -43: stw t3, [%dst + off + 0x1c]; \ - addcc %sum, t3, %sum; \ - srlx t3, 32, t3; \ - bcc,pt %xcc, 44f; \ - stw t3, [%dst + off + 0x18]; \ - add %sum, 1, %sum; \ -44: stw t4, [%dst + off + 0x24]; \ - addcc %sum, t4, %sum; \ - srlx t4, 32, t4; \ - bcc,pt %xcc, 45f; \ - stw t4, [%dst + off + 0x20]; \ - add %sum, 1, %sum; \ -45: stw t5, [%dst + off + 0x2c]; \ - addcc %sum, t5, %sum; \ - srlx t5, 32, t5; \ - bcc,pt %xcc, 46f; \ - stw t5, [%dst + off + 0x28]; \ - add %sum, 1, %sum; \ -46: stw t6, [%dst + off + 0x34]; \ - addcc %sum, t6, %sum; \ - srlx t6, 32, t6; \ - bcc,pt %xcc, 47f; \ - stw t6, [%dst + off + 0x30]; \ - add %sum, 1, %sum; \ -47: stw t7, [%dst + off + 0x3c]; \ - addcc %sum, t7, %sum; \ - srlx t7, 32, t7; \ - bcc,pt %xcc, 48f; \ - stw t7, [%dst + off + 0x38]; \ - add %sum, 1, %sum; \ -48: #define CSUMCOPY_LASTCHUNK(off, t0, t1) \ ldxa [%src - off - 0x08] %asi, t0; \ @@ -296,6 +98,7 @@ cc_end_cruft: add %sum, 1, %sum ! IEU1 cc_fixit: + cmp %len, 6 ! IEU1 Group bl,a,pn %icc, ccte ! CTI andcc %len, 0xf, %g7 ! IEU1 Group andcc %src, 2, %g0 ! IEU1 Group @@ -316,17 +119,17 @@ cc_fixit: sll %g3, 16, %g3 ! IEU0 Group srl %sum, 16, %sum ! IEU0 Group or %g3, %sum, %sum ! IEU0 Group (regdep) -1: be,pt %icc, cc_dword_aligned ! CTI - andn %len, 0xff, %g2 ! IEU1 +1: be,pt %icc, ccmerge ! CTI + andcc %len, 0xf0, %g1 ! IEU1 lduwa [%src + 0x00] %asi, %g4 ! Load Group sub %len, 4, %len ! IEU0 add %src, 4, %src ! IEU1 add %dst, 4, %dst ! IEU0 Group addcc %g4, %sum, %sum ! IEU1 Group + 1 bubble stw %g4, [%dst - 0x4] ! Store - bcc,pt %xcc, cc_dword_aligned ! CTI - andn %len, 0xff, %g2 ! IEU0 Group - b,pt %xcc, cc_dword_aligned ! CTI 4 clocks (mispredict) + bcc,pt %xcc, ccmerge ! CTI + andcc %len, 0xf0, %g1 ! IEU1 Group + b,pt %xcc, ccmerge ! CTI 4 clocks (mispredict) add %sum, 1, %sum ! IEU0 .align 32 @@ -342,26 +145,8 @@ csum_partial_copy_sparc64: /* %o0=src, %o1=dest, %o2=len, %o3=sum */ cmp %len, 256 ! IEU1 Group bgeu,pt %icc, csum_partial_copy_vis ! CTI andcc %src, 7, %g0 ! IEU1 Group - be,pt %icc, cc_dword_aligned ! CTI - andn %len, 0xff, %g2 ! IEU0 - b,pt %xcc, cc_fixit ! CTI Group - cmp %len, 6 ! IEU1 -cc_dword_aligned: - brz,pn %g2, 3f ! CTI Group - andcc %dst, 4, %g0 ! IEU1 Group (brz uses IEU1) - be,pn %icc, ccdbl + 4 ! CTI -5: CSUMCOPY_ECACHE_LOAD( 0x00,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7) - CSUMCOPY_EC_STUNALIGN_LDNXT(0x40,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7) - CSUMCOPY_EC_STUNALIGN_LDNXT(0x80,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7) - CSUMCOPY_EC_STUNALIGN_LDNXT(0xc0,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7) - CSUMCOPY_EC_STUNALIGN( 0xc0,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7) -10: - sub %len, 256, %len ! IEU0 Group - add %src, 256, %src ! IEU1 - andncc %len, 0xff, %g0 ! IEU1 Group - bne,pt %icc, 5b ! CTI - add %dst, 256, %dst ! IEU0 -3: andcc %len, 0xf0, %g1 ! IEU1 Group + bne,pn %icc, cc_fixit ! CTI + andcc %len, 0xf0, %g1 ! IEU1 Group ccmerge:be,pn %icc, ccte ! CTI andcc %len, 0xf, %g7 ! IEU1 Group sll %g1, 2, %o4 ! IEU0 @@ -396,19 +181,6 @@ ccfold: sllx %sum, 32, %o0 ! IEU0 Group add %o0, 1, %o0 ! IEU1 4 clocks (mispredict) 1: retl ! CTI Group brk forced sllx %g4, 32,%g4 ! IEU0 Group -ccdbl: CSUMCOPY_ECACHE_LOAD( 0x00,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7) - CSUMCOPY_EC_STALIGNED_LDNXT(0x40,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7) - CSUMCOPY_EC_STALIGNED_LDNXT(0x80,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7) - CSUMCOPY_EC_STALIGNED_LDNXT(0xc0,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7) - CSUMCOPY_EC_STALIGNED( 0xc0,%o4,%o5,%g2,%g3,%g4,%g5,%g1,%g7) -11: - sub %len, 256, %len ! IEU0 Group - add %src, 256, %src ! IEU1 - andncc %len, 0xff, %g0 ! IEU1 Group - bne,pt %icc, ccdbl ! CTI - add %dst, 256, %dst ! IEU0 - b,pt %xcc, ccmerge ! CTI Group - andcc %len, 0xf0, %g1 ! IEU1 ccslow: mov 0, %g5 brlez,pn %len, 4f |