summaryrefslogtreecommitdiffstats
path: root/arch/mips/lib/memcpy.S
blob: 8039d21ae1066c156d05b17252200f9c9555d04c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
/* memcpy.S: Mips optimized memcpy based upon SparcLinux code.
 *
 *  Copyright(C) 1995 Linus Torvalds
 *  Copyright(C) 1996 David S. Miller
 *  Copyright(C) 1996 Eddie C. Dost
 *
 * derived from:
 *	e-mail between David and Eddie.
 */

#include <asm/asm.h>
#include <asm/regdef.h>
#include <asm/segment.h>

#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5) \
	lw	t0, (offset + 0x18)(src); \
	lw	t1, (offset + 0x1c)(src); \
	sw	t0, (offset + 0x18)(dst); \
	lw	t2, (offset + 0x10)(src); \
	sw	t1, (offset + 0x1c)(dst); \
	lw	t3, (offset + 0x14)(src); \
	sw	t2, (offset + 0x10)(dst); \
	lw	t4, (offset + 0x08)(src); \
	sw	t3, (offset + 0x14)(dst); \
	lw	t5, (offset + 0x0c)(src); \
	sw	t4, (offset + 0x08)(dst); \
	lw	t0, (offset + 0x00)(src); \
	sw	t5, (offset + 0x0c)(dst); \
	lw	t1, (offset + 0x04)(src); \
	sw	t0, (offset + 0x00)(dst); \
	sw	t1, (offset + 0x04)(dst); \

	/* Alignment cases are:
	 * 1) (src&0x3)=0x0 (dst&0x3)=0x0 	can optimize
	 * 2) (src&0x3)=0x1 (dst&0x3)=0x1	can optimize
	 * 3) (src&0x3)=0x2 (dst&0x3)=0x2	can optimize
	 * 4) (src&0x3)=0x3 (dst&0x3)=0x3	can optimize
	 * 5) anything else			cannot optimize
	 */

	/* I hate MIPS register names... AIEEE, it's a SPARC! */
#define o0 a0
#define o1 a1
#define o2 a2
#define o3 a3
#define o4 t0
#define o5 t1
#define o6 sp
#define o7 ra
#define g0 zero
#define g1 t2
#define g2 t3
#define g3 t4
#define g4 t5
#define g5 t6
#define g6 t7
#define g7 t8

	.text
	.set	noreorder
	.set	noat

	.globl	bcopy
	.globl	amemmove
	.globl	memmove
	.globl	memcpy
	.align	2
bcopy:
	move	o3, o0
	move	o0, o1
	move	o1, o3

amemmove:
memmove:
memcpy:			/* o0=dst o1=src o2=len */
	xor	o4, o0, o1
	andi	o4, o4, 0x3
	move	g6, o0
	beq	o4, g0, can_align
	 sltiu	g7, o2, 0x8

	b	cannot_optimize
	 move	g1, o2

can_align:
	bne	g7, g0, cannot_optimize
	 move	g1, o2

	beq	o2, g0, out
	 andi	g7, o1, 0x1

hword_align:
	beq	g7, g0, word_align
	 andi	g7, o1, 0x2

	lbu	o4, 0x00(o1)
	subu	o2, o2, 0x1
	sb	o4, 0x00(o0)
	addu	o1, o1, 0x1
	addu	o0, o0, 0x1
	andi	g7, o1, 0x2

word_align:
	beq	g7, g0, dword_align
	 sltiu	g7, o2, 56
	
	lhu	o4, 0x00(o1)
	subu	o2, o2, 0x2
	sh	o4, 0x00(o0)
	sltiu	g7, o2, 56
	addu	o0, o0, 0x2
	addu	o1, o1, 0x2

dword_align:
	bne	g7, g0, do_end_words
	 move	g7, o2

	andi	g7, o1, 0x4
	beq	g7, zero, qword_align
	 andi	g7, o1, 0x8

	lw	o4, 0x00(o1)
	subu	o2, o2, 0x4
	sw	o4, 0x00(o0)
	addu	o1, o1, 0x4
	addu	o0, o0, 0x4
	andi	g7, o1, 0x8

qword_align:
	beq	g7, g0, oword_align
	 andi	g7, o1, 0x10

	lw	o4, 0x00(o1)
	lw	o5, 0x04(o1)
	subu	o2, o2, 0x8
	sw	o4, 0x00(o0)
	addu	o1, o1, 0x8
	sw	o5, 0x04(o0)
	andi	g7, o1, 0x10
	addu	o0, o0, 0x8

oword_align:
	beq	g7, g0, begin_movement
	 srl	g7, o2, 0x7

	lw	g2, 0x08(o1)
	lw	g3, 0x0c(o1)
	lw	o4, 0x00(o1)
	lw	o5, 0x04(o1)
	sw	g2, 0x08(o0)
	subu	o2, o2, 0x10
	sw	g3, 0x0c(o0)
	addu	o1, o1, 0x10
	sw	o4, 0x00(o0)
	srl	g7, o2, 0x7
	addu	o0, o0, 0x10
	sw	o5, -0x0c(o0)

begin_movement:
	beq	g7, g0, 0f
	 andi	g1, o2, 0x40

move_128bytes:
	MOVE_BIGCHUNK(o1, o0, 0x00, o4, o5, g2, g3, g4, g5)
	MOVE_BIGCHUNK(o1, o0, 0x20, o4, o5, g2, g3, g4, g5)
	MOVE_BIGCHUNK(o1, o0, 0x40, o4, o5, g2, g3, g4, g5)
	MOVE_BIGCHUNK(o1, o0, 0x60, o4, o5, g2, g3, g4, g5)
	subu	g7, g7, 0x01
	addu	o1, o1, 0x80
	bne	g7, g0, move_128bytes
	 addu	o0, o0, 0x80

0:
	beq	g1, g0, 1f
	 andi	g1, o2, 0x20

move_64bytes:
	MOVE_BIGCHUNK(o1, o0, 0x00, o4, o5, g2, g3, g4, g5)
	MOVE_BIGCHUNK(o1, o0, 0x20, o4, o5, g2, g3, g4, g5)
	addu	o1, o1, 0x40
	addu	o0, o0, 0x40

1:
	beq	g1, g0, do_end_words
	 andi	g7, o2, 0x1c

move_32bytes:
	MOVE_BIGCHUNK(o1, o0, 0x00, o4, o5, g2, g3, g4, g5)
	andi	g7, o2, 0x1c
	addu	o1, o1, 0x20
	addu	o0, o0, 0x20

do_end_words:
	beq	g7, g0, maybe_end_cruft
	 srl	g7, g7, 0x2

end_words:
	lw	o4, 0x00(o1)
	subu	g7, g7, 0x1
	sw	o4, 0x00(o0)
	addu	o1, o1, 0x4
	bne	g7, g0, end_words
	 addu	o0, o0, 0x4

maybe_end_cruft:
	andi	g1, o2, 0x3

cannot_optimize:
	beq	g1, g0, out
	 move	o2, g1

end_bytes:
	lbu	o4, 0x00(o1)
	subu	o2, o2, 0x1
	sb	o4, 0x00(o0)
	addu	o1, o1, 0x1
	bne	o2, g0, end_bytes
	 addu	o0, o0, 0x1

out:
	jr	o7
	 move	v0, g6