diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-11-23 02:00:47 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-11-23 02:00:47 +0000 |
commit | 06615f62b17d7de6e12d2f5ec6b88cf30af08413 (patch) | |
tree | 8766f208847d4876a6db619aebbf54d53b76eb44 /arch/ia64/lib | |
parent | fa9bdb574f4febb751848a685d9a9017e04e1d53 (diff) |
Merge with Linux 2.4.0-test10.
Diffstat (limited to 'arch/ia64/lib')
-rw-r--r-- | arch/ia64/lib/Makefile | 49 | ||||
-rw-r--r-- | arch/ia64/lib/idiv32.S | 83 | ||||
-rw-r--r-- | arch/ia64/lib/idiv64.S (renamed from arch/ia64/lib/idiv.S) | 64 | ||||
-rw-r--r-- | arch/ia64/lib/io.c | 4 |
4 files changed, 146 insertions, 54 deletions
diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile index 318e314cc..90e697179 100644 --- a/arch/ia64/lib/Makefile +++ b/arch/ia64/lib/Makefile @@ -3,30 +3,49 @@ # .S.o: - $(CC) $(AFLAGS) -c $< -o $@ + $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c $< -o $@ L_TARGET = lib.a -L_OBJS = __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o \ - checksum.o clear_page.o csum_partial_copy.o copy_page.o \ - copy_user.o clear_user.o memcpy.o memset.o strncpy_from_user.o \ - strlen.o strlen_user.o strnlen_user.o \ +L_OBJS = __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ + __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o \ + checksum.o clear_page.o csum_partial_copy.o copy_page.o \ + copy_user.o clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \ flush.o do_csum.o +ifneq ($(CONFIG_ITANIUM_ASTEP_SPECIFIC),y) + L_OBJS += memcpy.o memset.o strlen.o +endif + LX_OBJS = io.o -IGNORE_FLAGS_OBJS = __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o +IGNORE_FLAGS_OBJS = __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ + __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o -include $(TOPDIR)/Rules.make +$(L_TARGET): + +__divdi3.o: idiv64.S + $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -o $@ $< + +__udivdi3.o: idiv64.S + $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DUNSIGNED -c -o $@ $< -__divdi3.o: idiv.S - $(CC) $(AFLAGS) -c -o $@ $< +__moddi3.o: idiv64.S + $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -c -o $@ $< -__udivdi3.o: idiv.S - $(CC) $(AFLAGS) -c -DUNSIGNED -c -o $@ $< +__umoddi3.o: idiv64.S + $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -DUNSIGNED -c -o $@ $< -__moddi3.o: idiv.S - $(CC) $(AFLAGS) -c -DMODULO -c -o $@ $< +__divsi3.o: idiv32.S + $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -o $@ $< -__umoddi3.o: idiv.S - $(CC) $(AFLAGS) -c -DMODULO -DUNSIGNED -c -o $@ $< +__udivsi3.o: idiv32.S + $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DUNSIGNED -c -o $@ $< + +__modsi3.o: idiv32.S + $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -c -o $@ $< + +__umodsi3.o: idiv32.S + $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -DUNSIGNED -c -o $@ $< + +include $(TOPDIR)/Rules.make diff --git a/arch/ia64/lib/idiv32.S b/arch/ia64/lib/idiv32.S new file mode 100644 index 000000000..13f4608c3 --- /dev/null +++ b/arch/ia64/lib/idiv32.S @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2000 Hewlett-Packard Co + * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com> + * + * 32-bit integer division. + * + * This code is based on the application note entitled "Divide, Square Root + * and Remainder Algorithms for the IA-64 Architecture". This document + * is available as Intel document number 248725-002 or via the web at + * http://developer.intel.com/software/opensource/numerics/ + * + * For more details on the theory behind these algorithms, see "IA-64 + * and Elementary Functions" by Peter Markstein; HP Professional Books + * (http://www.hp.com/go/retailbooks/) + */ + +#include <asm/asmmacro.h> + +#ifdef MODULO +# define OP mod +#else +# define OP div +#endif + +#ifdef UNSIGNED +# define SGN u +# define EXTEND zxt4 +# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b +# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b +#else +# define SGN +# define EXTEND sxt4 +# define INT_TO_FP(a,b) fcvt.xf a=b +# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b +#endif + +#define PASTE1(a,b) a##b +#define PASTE(a,b) PASTE1(a,b) +#define NAME PASTE(PASTE(__,SGN),PASTE(OP,si3)) + +GLOBAL_ENTRY(NAME) + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + mov r2 = 0xffdd // r2 = -34 + 65535 (fp reg format bias) + EXTEND in0 = in0 // in0 = a + EXTEND in1 = in1 // in1 = b + ;; + setf.sig f8 = in0 + setf.sig f9 = in1 +#ifdef MODULO + sub in1 = r0, in1 // in1 = -b +#endif + ;; + // Convert the inputs to FP, to avoid FP software-assist faults. + INT_TO_FP(f8, f8) + INT_TO_FP(f9, f9) + ;; + setf.exp f7 = r2 // f7 = 2^-34 + frcpa.s1 f6, p6 = f8, f9 // y0 = frcpa(b) + ;; +(p6) fmpy.s1 f8 = f8, f6 // q0 = a*y0 +(p6) fnma.s1 f6 = f9, f6, f1 // e0 = -b*y0 + 1 + ;; +#ifdef MODULO + setf.sig f9 = in1 // f9 = -b +#endif +(p6) fma.s1 f8 = f6, f8, f8 // q1 = e0*q0 + q0 +(p6) fma.s1 f6 = f6, f6, f7 // e1 = e0*e0 + 2^-34 + ;; +#ifdef MODULO + setf.sig f7 = in0 +#endif +(p6) fma.s1 f6 = f6, f8, f8 // q2 = e1*q1 + q1 + ;; + FP_TO_INT(f6, f6) // q = trunc(q2) + ;; +#ifdef MODULO + xma.l f6 = f6, f9, f7 // r = q*(-b) + a + ;; +#endif + getf.sig r8 = f6 // transfer result to result register + br.ret.sptk rp +END(NAME) diff --git a/arch/ia64/lib/idiv.S b/arch/ia64/lib/idiv64.S index da96863d3..db7d2284e 100644 --- a/arch/ia64/lib/idiv.S +++ b/arch/ia64/lib/idiv64.S @@ -1,25 +1,21 @@ /* - * Integer division routine. - * * Copyright (C) 1999-2000 Hewlett-Packard Co * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com> - */ - -#include <asm/asmmacro.h> - -/* - * Compute a 64-bit unsigned integer quotient. * - * Use reciprocal approximation and Newton-Raphson iteration to compute the - * quotient. frcpa gives 8.6 significant bits, so we need 3 iterations - * to get more than the 64 bits of precision that we need for DImode. + * 64-bit integer division. * - * Must use max precision for the reciprocal computations to get 64 bits of - * precision. + * This code is based on the application note entitled "Divide, Square Root + * and Remainder Algorithms for the IA-64 Architecture". This document + * is available as Intel document number 248725-002 or via the web at + * http://developer.intel.com/software/opensource/numerics/ * - * r32 holds the dividend. r33 holds the divisor. + * For more details on the theory behind these algorithms, see "IA-64 + * and Elementary Functions" by Peter Markstein; HP Professional Books + * (http://www.hp.com/go/retailbooks/) */ +#include <asm/asmmacro.h> + #ifdef MODULO # define OP mod #else @@ -59,40 +55,38 @@ GLOBAL_ENTRY(NAME) UNW(.body) INT_TO_FP(f9, f9) ;; - frcpa.s1 f17, p6 = f8, f9 // y = frcpa(b) + frcpa.s1 f17, p6 = f8, f9 // y0 = frcpa(b) ;; - /* - * This is the magic algorithm described in Section 8.6.2 of "IA-64 - * and Elementary Functions" by Peter Markstein; HP Professional Books - * (http://www.hp.com/go/retailbooks/) - */ -(p6) fmpy.s1 f7 = f8, f17 // q = a*y -(p6) fnma.s1 f6 = f9, f17, f1 // e = -b*y + 1 +(p6) fmpy.s1 f7 = f8, f17 // q0 = a*y0 +(p6) fnma.s1 f6 = f9, f17, f1 // e0 = -b*y0 + 1 ;; -(p6) fma.s1 f16 = f7, f6, f7 // q1 = q*e + q -(p6) fmpy.s1 f7 = f6, f6 // e1 = e*e +(p6) fma.s1 f16 = f7, f6, f7 // q1 = q0*e0 + q0 +(p6) fmpy.s1 f7 = f6, f6 // e1 = e0*e0 ;; +#ifdef MODULO + sub in1 = r0, in1 // in1 = -b +#endif (p6) fma.s1 f16 = f16, f7, f16 // q2 = q1*e1 + q1 -(p6) fma.s1 f6 = f17, f6, f17 // y1 = y*e + y +(p6) fma.s1 f6 = f17, f6, f17 // y1 = y0*e0 + y0 ;; (p6) fma.s1 f6 = f6, f7, f6 // y2 = y1*e1 + y1 (p6) fnma.s1 f7 = f9, f16, f8 // r = -b*q2 + a ;; -(p6) fma.s1 f17 = f7, f6, f16 // q3 = r*y2 + q2 - ;; #ifdef MODULO - FP_TO_INT(f17, f17) // round quotient to an unsigned integer - ;; - INT_TO_FP(f17, f17) // renormalize - ;; - fnma.s1 f17 = f17, f9, f8 // compute remainder - ;; + setf.sig f8 = in0 // f8 = a + setf.sig f9 = in1 // f9 = -b #endif +(p6) fma.s1 f17 = f7, f6, f16 // q3 = r*y2 + q2 + ;; UNW(.restore sp) ldf.fill f16 = [sp], 16 - FP_TO_INT(f8, f17) // round result to an (unsigned) integer + FP_TO_INT(f17, f17) // q = trunc(q3) ;; +#ifdef MODULO + xma.l f17 = f17, f9, f8 // r = q*(-b) + a + ;; +#endif + getf.sig r8 = f17 // transfer result to result register ldf.fill f17 = [sp] - getf.sig r8 = f8 // transfer result to result register br.ret.sptk rp END(NAME) diff --git a/arch/ia64/lib/io.c b/arch/ia64/lib/io.c index 466335172..baa408e01 100644 --- a/arch/ia64/lib/io.c +++ b/arch/ia64/lib/io.c @@ -1,4 +1,3 @@ -#include <linux/module.h> #include <linux/types.h> #include <asm/io.h> @@ -49,6 +48,3 @@ __ia64_memset_c_io (unsigned long dst, unsigned long c, long count) } } -EXPORT_SYMBOL(__ia64_memcpy_fromio); -EXPORT_SYMBOL(__ia64_memcpy_toio); -EXPORT_SYMBOL(__ia64_memset_c_io); |