1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
|
/* memcpy.S: Mips optimized memcpy based upon SparcLinux code.
*
* Copyright(C) 1995 Linus Torvalds
* Copyright(C) 1996 David S. Miller
* Copyright(C) 1996 Eddie C. Dost
*
* derived from:
* e-mail between David and Eddie.
*/
#include <asm/asm.h>
#include <asm/regdef.h>
#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5) \
lw t0, (offset + 0x18)(src); \
lw t1, (offset + 0x1c)(src); \
sw t0, (offset + 0x18)(dst); \
lw t2, (offset + 0x10)(src); \
sw t1, (offset + 0x1c)(dst); \
lw t3, (offset + 0x14)(src); \
sw t2, (offset + 0x10)(dst); \
lw t4, (offset + 0x08)(src); \
sw t3, (offset + 0x14)(dst); \
lw t5, (offset + 0x0c)(src); \
sw t4, (offset + 0x08)(dst); \
lw t0, (offset + 0x00)(src); \
sw t5, (offset + 0x0c)(dst); \
lw t1, (offset + 0x04)(src); \
sw t0, (offset + 0x00)(dst); \
sw t1, (offset + 0x04)(dst); \
/* Alignment cases are:
* 1) (src&0x3)=0x0 (dst&0x3)=0x0 can optimize
* 2) (src&0x3)=0x1 (dst&0x3)=0x1 can optimize
* 3) (src&0x3)=0x2 (dst&0x3)=0x2 can optimize
* 4) (src&0x3)=0x3 (dst&0x3)=0x3 can optimize
* 5) anything else cannot optimize
*/
/* I hate MIPS register names... AIEEE, it's a SPARC! */
#define o0 a0
#define o1 a1
#define o2 a2
#define o3 a3
#define o4 t0
#define o5 t1
#define o6 sp
#define o7 ra
#define g0 zero
#define g1 t2
#define g2 t3
#define g3 t4
#define g4 t5
#define g5 t6
#define g6 t7
#define g7 t8
.text
.set noreorder
.set noat
.globl bcopy
.globl amemmove
.globl memmove
.globl memcpy
.align 2
bcopy:
move o3, o0
move o0, o1
move o1, o3
amemmove:
memmove:
memcpy: /* o0=dst o1=src o2=len */
xor o4, o0, o1
andi o4, o4, 0x3
move g6, o0
beq o4, g0, can_align
sltiu g7, o2, 0x8
b cannot_optimize
move g1, o2
can_align:
bne g7, g0, cannot_optimize
move g1, o2
beq o2, g0, out
andi g7, o1, 0x1
hword_align:
beq g7, g0, word_align
andi g7, o1, 0x2
lbu o4, 0x00(o1)
subu o2, o2, 0x1
sb o4, 0x00(o0)
addu o1, o1, 0x1
addu o0, o0, 0x1
andi g7, o1, 0x2
word_align:
beq g7, g0, dword_align
sltiu g7, o2, 56
lhu o4, 0x00(o1)
subu o2, o2, 0x2
sh o4, 0x00(o0)
sltiu g7, o2, 56
addu o0, o0, 0x2
addu o1, o1, 0x2
dword_align:
bne g7, g0, do_end_words
move g7, o2
andi g7, o1, 0x4
beq g7, zero, qword_align
andi g7, o1, 0x8
lw o4, 0x00(o1)
subu o2, o2, 0x4
sw o4, 0x00(o0)
addu o1, o1, 0x4
addu o0, o0, 0x4
andi g7, o1, 0x8
qword_align:
beq g7, g0, oword_align
andi g7, o1, 0x10
lw o4, 0x00(o1)
lw o5, 0x04(o1)
subu o2, o2, 0x8
sw o4, 0x00(o0)
addu o1, o1, 0x8
sw o5, 0x04(o0)
andi g7, o1, 0x10
addu o0, o0, 0x8
oword_align:
beq g7, g0, begin_movement
srl g7, o2, 0x7
lw g2, 0x08(o1)
lw g3, 0x0c(o1)
lw o4, 0x00(o1)
lw o5, 0x04(o1)
sw g2, 0x08(o0)
subu o2, o2, 0x10
sw g3, 0x0c(o0)
addu o1, o1, 0x10
sw o4, 0x00(o0)
srl g7, o2, 0x7
addu o0, o0, 0x10
sw o5, -0x0c(o0)
begin_movement:
beq g7, g0, 0f
andi g1, o2, 0x40
move_128bytes:
MOVE_BIGCHUNK(o1, o0, 0x00, o4, o5, g2, g3, g4, g5)
MOVE_BIGCHUNK(o1, o0, 0x20, o4, o5, g2, g3, g4, g5)
MOVE_BIGCHUNK(o1, o0, 0x40, o4, o5, g2, g3, g4, g5)
MOVE_BIGCHUNK(o1, o0, 0x60, o4, o5, g2, g3, g4, g5)
subu g7, g7, 0x01
addu o1, o1, 0x80
bne g7, g0, move_128bytes
addu o0, o0, 0x80
0:
beq g1, g0, 1f
andi g1, o2, 0x20
move_64bytes:
MOVE_BIGCHUNK(o1, o0, 0x00, o4, o5, g2, g3, g4, g5)
MOVE_BIGCHUNK(o1, o0, 0x20, o4, o5, g2, g3, g4, g5)
addu o1, o1, 0x40
addu o0, o0, 0x40
1:
beq g1, g0, do_end_words
andi g7, o2, 0x1c
move_32bytes:
MOVE_BIGCHUNK(o1, o0, 0x00, o4, o5, g2, g3, g4, g5)
andi g7, o2, 0x1c
addu o1, o1, 0x20
addu o0, o0, 0x20
do_end_words:
beq g7, g0, maybe_end_cruft
srl g7, g7, 0x2
end_words:
lw o4, 0x00(o1)
subu g7, g7, 0x1
sw o4, 0x00(o0)
addu o1, o1, 0x4
bne g7, g0, end_words
addu o0, o0, 0x4
maybe_end_cruft:
andi g1, o2, 0x3
cannot_optimize:
beq g1, g0, out
move o2, g1
end_bytes:
lbu o4, 0x00(o1)
subu o2, o2, 0x1
sb o4, 0x00(o0)
addu o1, o1, 0x1
bne o2, g0, end_bytes
addu o0, o0, 0x1
out:
jr o7
move v0, g6
|