summaryrefslogtreecommitdiffstats
path: root/include/asm-ia64/xor.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/asm-ia64/xor.h')
-rw-r--r--include/asm-ia64/xor.h283
1 files changed, 283 insertions, 0 deletions
diff --git a/include/asm-ia64/xor.h b/include/asm-ia64/xor.h
new file mode 100644
index 000000000..28aca667c
--- /dev/null
+++ b/include/asm-ia64/xor.h
@@ -0,0 +1,283 @@
+/*
+ * include/asm-ia64/xor.h
+ *
+ * Optimized RAID-5 checksumming functions for IA-64.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+extern void xor_ia64_2(unsigned long, unsigned long *, unsigned long *);
+extern void xor_ia64_3(unsigned long, unsigned long *, unsigned long *,
+ unsigned long *);
+extern void xor_ia64_4(unsigned long, unsigned long *, unsigned long *,
+ unsigned long *, unsigned long *);
+extern void xor_ia64_5(unsigned long, unsigned long *, unsigned long *,
+ unsigned long *, unsigned long *, unsigned long *);
+
+asm ("
+ .text
+
+ // Assume L2 memory latency of 6 cycles.
+
+ .proc xor_ia64_2
+xor_ia64_2:
+ .prologue
+ .fframe 0
+ { .mii
+ .save ar.pfs, r31
+ alloc r31 = ar.pfs, 3, 0, 13, 16
+ .save ar.lc, r30
+ mov r30 = ar.lc
+ .save pr, r29
+ mov r29 = pr
+ ;;
+ }
+ .body
+ { .mii
+ mov r8 = in1
+ mov ar.ec = 6 + 2
+ shr in0 = in0, 3
+ ;;
+ }
+ { .mmi
+ adds in0 = -1, in0
+ mov r16 = in1
+ mov r17 = in2
+ ;;
+ }
+ { .mii
+ mov ar.lc = in0
+ mov pr.rot = 1 << 16
+ ;;
+ }
+ .rotr s1[6+1], s2[6+1], d[2]
+ .rotp p[6+2]
+0: { .mmi
+(p[0]) ld8.nta s1[0] = [r16], 8
+(p[0]) ld8.nta s2[0] = [r17], 8
+(p[6]) xor d[0] = s1[6], s2[6]
+ }
+ { .mfb
+(p[6+1]) st8.nta [r8] = d[1], 8
+ nop.f 0
+ br.ctop.dptk.few 0b
+ ;;
+ }
+ { .mii
+ mov ar.lc = r30
+ mov pr = r29, -1
+ }
+ { .bbb
+ br.ret.sptk.few rp
+ }
+ .endp xor_ia64_2
+
+ .proc xor_ia64_3
+xor_ia64_3:
+ .prologue
+ .fframe 0
+ { .mii
+ .save ar.pfs, r31
+ alloc r31 = ar.pfs, 4, 0, 20, 24
+ .save ar.lc, r30
+ mov r30 = ar.lc
+ .save pr, r29
+ mov r29 = pr
+ ;;
+ }
+ .body
+ { .mii
+ mov r8 = in1
+ mov ar.ec = 6 + 2
+ shr in0 = in0, 3
+ ;;
+ }
+ { .mmi
+ adds in0 = -1, in0
+ mov r16 = in1
+ mov r17 = in2
+ ;;
+ }
+ { .mii
+ mov r18 = in3
+ mov ar.lc = in0
+ mov pr.rot = 1 << 16
+ ;;
+ }
+ .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
+ .rotp p[6+2]
+0: { .mmi
+(p[0]) ld8.nta s1[0] = [r16], 8
+(p[0]) ld8.nta s2[0] = [r17], 8
+(p[6]) xor d[0] = s1[6], s2[6]
+ ;;
+ }
+ { .mmi
+(p[0]) ld8.nta s3[0] = [r18], 8
+(p[6+1]) st8.nta [r8] = d[1], 8
+(p[6]) xor d[0] = d[0], s3[6]
+ }
+ { .bbb
+ br.ctop.dptk.few 0b
+ ;;
+ }
+ { .mii
+ mov ar.lc = r30
+ mov pr = r29, -1
+ }
+ { .bbb
+ br.ret.sptk.few rp
+ }
+ .endp xor_ia64_3
+
+ .proc xor_ia64_4
+xor_ia64_4:
+ .prologue
+ .fframe 0
+ { .mii
+ .save ar.pfs, r31
+ alloc r31 = ar.pfs, 5, 0, 27, 32
+ .save ar.lc, r30
+ mov r30 = ar.lc
+ .save pr, r29
+ mov r29 = pr
+ ;;
+ }
+ .body
+ { .mii
+ mov r8 = in1
+ mov ar.ec = 6 + 2
+ shr in0 = in0, 3
+ ;;
+ }
+ { .mmi
+ adds in0 = -1, in0
+ mov r16 = in1
+ mov r17 = in2
+ ;;
+ }
+ { .mii
+ mov r18 = in3
+ mov ar.lc = in0
+ mov pr.rot = 1 << 16
+ }
+ { .mfb
+ mov r19 = in4
+ ;;
+ }
+ .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
+ .rotp p[6+2]
+0: { .mmi
+(p[0]) ld8.nta s1[0] = [r16], 8
+(p[0]) ld8.nta s2[0] = [r17], 8
+(p[6]) xor d[0] = s1[6], s2[6]
+ }
+ { .mmi
+(p[0]) ld8.nta s3[0] = [r18], 8
+(p[0]) ld8.nta s4[0] = [r19], 8
+(p[6]) xor r20 = s3[6], s4[6]
+ ;;
+ }
+ { .mib
+(p[6+1]) st8.nta [r8] = d[1], 8
+(p[6]) xor d[0] = d[0], r20
+ br.ctop.dptk.few 0b
+ ;;
+ }
+ { .mii
+ mov ar.lc = r30
+ mov pr = r29, -1
+ }
+ { .bbb
+ br.ret.sptk.few rp
+ }
+ .endp xor_ia64_4
+
+ .proc xor_ia64_5
+xor_ia64_5:
+ .prologue
+ .fframe 0
+ { .mii
+ .save ar.pfs, r31
+ alloc r31 = ar.pfs, 6, 0, 34, 40
+ .save ar.lc, r30
+ mov r30 = ar.lc
+ .save pr, r29
+ mov r29 = pr
+ ;;
+ }
+ .body
+ { .mii
+ mov r8 = in1
+ mov ar.ec = 6 + 2
+ shr in0 = in0, 3
+ ;;
+ }
+ { .mmi
+ adds in0 = -1, in0
+ mov r16 = in1
+ mov r17 = in2
+ ;;
+ }
+ { .mii
+ mov r18 = in3
+ mov ar.lc = in0
+ mov pr.rot = 1 << 16
+ }
+ { .mib
+ mov r19 = in4
+ mov r20 = in5
+ ;;
+ }
+ .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
+ .rotp p[6+2]
+0: { .mmi
+(p[0]) ld8.nta s1[0] = [r16], 8
+(p[0]) ld8.nta s2[0] = [r17], 8
+(p[6]) xor d[0] = s1[6], s2[6]
+ }
+ { .mmi
+(p[0]) ld8.nta s3[0] = [r18], 8
+(p[0]) ld8.nta s4[0] = [r19], 8
+(p[6]) xor r21 = s3[6], s4[6]
+ ;;
+ }
+ { .mmi
+(p[0]) ld8.nta s5[0] = [r20], 8
+(p[6+1]) st8.nta [r8] = d[1], 8
+(p[6]) xor d[0] = d[0], r21
+ ;;
+ }
+ { .mfb
+(p[6]) xor d[0] = d[0], s5[6]
+ nop.f 0
+ br.ctop.dptk.few 0b
+ ;;
+ }
+ { .mii
+ mov ar.lc = r30
+ mov pr = r29, -1
+ }
+ { .bbb
+ br.ret.sptk.few rp
+ }
+ .endp xor_ia64_5
+");
+
+static struct xor_block_template xor_block_ia64 = {
+ name: "ia64",
+ do_2: xor_ia64_2,
+ do_3: xor_ia64_3,
+ do_4: xor_ia64_4,
+ do_5: xor_ia64_5,
+};
+
+#define XOR_TRY_TEMPLATES xor_speed(&xor_block_ia64)