1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
;; $Id: checksum.S,v 1.1 2000/07/10 16:25:21 bjornw Exp $
;; A fast checksum routine using movem
;; Copyright (c) 1998 Bjorn Wesen/Axis Communications AB
;; csum_partial(const unsigned char * buff, int len, unsigned int sum)
.globl _csum_partial
_csum_partial:
;; check for breakeven length between movem and normal word looping versions
cmpu.w 80,r11
bcs no_movem
nop
;; need to save the registers we use below in the movem loop
;; this overhead is why we have a check above for breakeven length
subq 9*4,sp
movem r8,[sp]
;; do a movem checksum
;; r10 - src
;; r11 - length
;; r12 - checksum
subq 10*4,r11 ; update length for the first loop
mloop: movem [r10+],r9 ; read 10 longwords
;; perform dword checksumming on the 10 longwords
add.d r0,r12
ax
add.d r1,r12
ax
add.d r2,r12
ax
add.d r3,r12
ax
add.d r4,r12
ax
add.d r5,r12
ax
add.d r6,r12
ax
add.d r7,r12
ax
add.d r8,r12
ax
add.d r9,r12
;; fold the carry into the checksum, to avoid having to loop the carry
;; back into the top
ax
addq 0,r12
ax ; do it again, since we might have generated a carry
addq 0,r12
subq 10*4,r11
bge mloop
nop
addq 10*4,r11 ; compensate for last loop underflowing length
;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
moveq -1,r1 ; put 0xffff in r1, faster than move.d 0xffff,r1
lsrq 16,r1
move.d r12,r0
lsrq 16,r0 ; r0 = checksum >> 16
and.d r1,r12 ; checksum = checksum & 0xffff
add.d r0,r12 ; checksum += r0
move.d r12,r0 ; do the same again, maybe we got a carry last add
lsrq 16,r0
and.d r1,r12
add.d r0,r12
movem [sp+],r8 ; restore regs
no_movem:
cmpq 2,r11
blt no_words
nop
;; checksum the rest of the words
subq 2,r11
wloop: subq 2,r11
bge wloop
addu.w [r10+],r12
addq 2,r11
no_words:
;; see if we have one odd byte more
cmpq 1,r11
beq do_byte
nop
ret
move.d r12, r10
do_byte:
;; copy and checksum the last byte
addu.b [r10],r12
ret
move.d r12, r10
|