diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1998-03-17 22:05:47 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1998-03-17 22:05:47 +0000 |
commit | 27cfca1ec98e91261b1a5355d10a8996464b63af (patch) | |
tree | 8e895a53e372fa682b4c0a585b9377d67ed70d0e /arch/sparc64/math-emu | |
parent | 6a76fb7214c477ccf6582bd79c5b4ccc4f9c41b1 (diff) |
Look Ma' what I found on my harddisk ...
o New faster syscalls for 2.1.x, too
o Upgrade to 2.1.89.
Don't try to run this. It's flaky as hell. But feel free to debug ...
Diffstat (limited to 'arch/sparc64/math-emu')
49 files changed, 2877 insertions, 0 deletions
diff --git a/arch/sparc64/math-emu/.cvsignore b/arch/sparc64/math-emu/.cvsignore new file mode 100644 index 000000000..857dd22e9 --- /dev/null +++ b/arch/sparc64/math-emu/.cvsignore @@ -0,0 +1,2 @@ +.depend +.*.flags diff --git a/arch/sparc64/math-emu/Makefile b/arch/sparc64/math-emu/Makefile new file mode 100644 index 000000000..ea816d98e --- /dev/null +++ b/arch/sparc64/math-emu/Makefile @@ -0,0 +1,33 @@ +# +# Makefile for the FPU Quad (long double) instruction emulation. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +O_TARGET := math-emu.o +O_OBJS := math.o fabsq.o faddq.o fdivq.o fdmulq.o fitoq.o \ + fmovq.o fmulq.o fnegq.o fqtoi.o fqtox.o fsubq.o \ + fxtoq.o fdtoq.o fstoq.o fqtos.o fqtod.o fsqrtq.o \ + fcmpq.o fcmpeq.o udivmodti4.o \ + fsqrts.o fsqrtd.o fadds.o faddd.o fsubs.o fsubd.o \ + fmuls.o fmuld.o fdivs.o fdivd.o fsmuld.o \ + fstoi.o fdtoi.o fstox.o fdtox.o fstod.o fdtos.o + +ifeq ($(CONFIG_MATHEMU),m) +M_OBJS := $(O_TARGET) +endif + +.S.s: + $(CPP) -D__ASSEMBLY__ -ansi $< -o $*.s + +.S.o: + $(CC) -D__ASSEMBLY__ -ansi -c $< -o $*.o + +ifneq ($(CONFIG_MATHEMU),y) +do_it_all: +endif + +include $(TOPDIR)/Rules.make diff --git a/arch/sparc64/math-emu/double.h b/arch/sparc64/math-emu/double.h new file mode 100644 index 000000000..b68d76790 --- /dev/null +++ b/arch/sparc64/math-emu/double.h @@ -0,0 +1,129 @@ +/* + * Definitions for IEEE Double Precision + */ + +#if _FP_W_TYPE_SIZE < 32 +#error "Here's a nickle kid. Go buy yourself a real computer." +#endif + +#if _FP_W_TYPE_SIZE < 64 +#define _FP_FRACTBITS_D (2 * _FP_W_TYPE_SIZE) +#else +#define _FP_FRACTBITS_D _FP_W_TYPE_SIZE +#endif + +#define _FP_FRACBITS_D 53 +#define _FP_FRACXBITS_D (_FP_FRACTBITS_D - _FP_FRACBITS_D) +#define _FP_WFRACBITS_D (_FP_WORKBITS + _FP_FRACBITS_D) +#define _FP_WFRACXBITS_D (_FP_FRACTBITS_D - _FP_WFRACBITS_D) +#define _FP_EXPBITS_D 11 +#define _FP_EXPBIAS_D 1023 +#define _FP_EXPMAX_D 2047 + +#define _FP_QNANBIT_D \ + ((_FP_W_TYPE)1 << (_FP_FRACBITS_D-2) % _FP_W_TYPE_SIZE) +#define _FP_IMPLBIT_D \ + ((_FP_W_TYPE)1 << (_FP_FRACBITS_D-1) % _FP_W_TYPE_SIZE) +#define _FP_OVERFLOW_D \ + ((_FP_W_TYPE)1 << _FP_WFRACBITS_D % _FP_W_TYPE_SIZE) + +#if _FP_W_TYPE_SIZE < 64 + +union _FP_UNION_D +{ + double flt; + struct { +#if __BYTE_ORDER == __BIG_ENDIAN + unsigned sign : 1; + unsigned exp : _FP_EXPBITS_D; + unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE; + unsigned frac0 : _FP_W_TYPE_SIZE; +#else + unsigned frac0 : _FP_W_TYPE_SIZE; + unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE; + unsigned exp : _FP_EXPBITS_D; + unsigned sign : 1; +#endif + } bits __attribute__((packed)); +}; + +#define FP_DECL_D(X) _FP_DECL(2,X) +#define FP_UNPACK_RAW_D(X,val) _FP_UNPACK_RAW_2(D,X,val) +#define FP_PACK_RAW_D(val,X) _FP_PACK_RAW_2(D,val,X) + +#define FP_UNPACK_D(X,val) \ + do { \ + _FP_UNPACK_RAW_2(D,X,val); \ + _FP_UNPACK_CANONICAL(D,2,X); \ + } while (0) + +#define FP_PACK_D(val,X) \ + do { \ + _FP_PACK_CANONICAL(D,2,X); \ + _FP_PACK_RAW_2(D,val,X); \ + } while (0) + +#define FP_NEG_D(R,X) _FP_NEG(D,2,R,X) +#define FP_ADD_D(R,X,Y) _FP_ADD(D,2,R,X,Y) +#define FP_SUB_D(R,X,Y) _FP_SUB(D,2,R,X,Y) +#define FP_MUL_D(R,X,Y) _FP_MUL(D,2,R,X,Y) +#define FP_DIV_D(R,X,Y) _FP_DIV(D,2,R,X,Y) +#define FP_SQRT_D(R,X) _FP_SQRT(D,2,R,X) + +#define FP_CMP_D(r,X,Y,un) _FP_CMP(D,2,r,X,Y,un) +#define FP_CMP_EQ_D(r,X,Y) _FP_CMP_EQ(D,2,r,X,Y) + +#define FP_TO_INT_D(r,X,rsz,rsg) _FP_TO_INT(D,2,r,X,rsz,rsg) +#define FP_FROM_INT_D(X,r,rs,rt) _FP_FROM_INT(D,2,X,r,rs,rt) + +#else + +union _FP_UNION_D +{ + double flt; + struct { +#if __BYTE_ORDER == __BIG_ENDIAN + unsigned sign : 1; + unsigned exp : _FP_EXPBITS_D; + unsigned long frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0); +#else + unsigned long frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0); + unsigned exp : _FP_EXPBITS_D; + unsigned sign : 1; +#endif + } bits __attribute__((packed)); +}; + +#define FP_DECL_D(X) _FP_DECL(1,X) +#define FP_UNPACK_RAW_D(X,val) _FP_UNPACK_RAW_1(D,X,val) +#define FP_PACK_RAW_D(val,X) _FP_PACK_RAW_1(D,val,X) + +#define FP_UNPACK_D(X,val) \ + do { \ + _FP_UNPACK_RAW_1(D,X,val); \ + _FP_UNPACK_CANONICAL(D,1,X); \ + } while (0) + +#define FP_PACK_D(val,X) \ + do { \ + _FP_PACK_CANONICAL(D,1,X); \ + _FP_PACK_RAW_1(D,val,X); \ + } while (0) + +#define FP_NEG_D(R,X) _FP_NEG(D,1,R,X) +#define FP_ADD_D(R,X,Y) _FP_ADD(D,1,R,X,Y) +#define FP_SUB_D(R,X,Y) _FP_SUB(D,1,R,X,Y) +#define FP_MUL_D(R,X,Y) _FP_MUL(D,1,R,X,Y) +#define FP_DIV_D(R,X,Y) _FP_DIV(D,1,R,X,Y) +#define FP_SQRT_D(R,X) _FP_SQRT(D,1,R,X) + +/* The implementation of _FP_MUL_D and _FP_DIV_D should be chosen by + the target machine. */ + +#define FP_CMP_D(r,X,Y,un) _FP_CMP(D,1,r,X,Y,un) +#define FP_CMP_EQ_D(r,X,Y) _FP_CMP_EQ(D,1,r,X,Y) + +#define FP_TO_INT_D(r,X,rsz,rsg) _FP_TO_INT(D,1,r,X,rsz,rsg) +#define FP_FROM_INT_D(X,r,rs,rt) _FP_FROM_INT(D,1,X,r,rs,rt) + +#endif /* W_TYPE_SIZE < 64 */ diff --git a/arch/sparc64/math-emu/fabsq.c b/arch/sparc64/math-emu/fabsq.c new file mode 100644 index 000000000..e6aa497c8 --- /dev/null +++ b/arch/sparc64/math-emu/fabsq.c @@ -0,0 +1,19 @@ +#include "soft-fp.h" +#include "quad.h" + +int FABSQ(unsigned long *rd, unsigned long *rs2) +{ +/* + FP_DECL_Q(A); FP_DECL_Q(R); + + __FP_UNPACK_Q(A, rs2); + _FP_FRAC_COPY_2(R, A); + R_c = A_c; + R_e = A_e; + R_s = 0; + __FP_PACK_Q(rd, R); + */ + rd[0] = rs2[0] & 0x7fffffffffffffffUL; + rd[1] = rs2[1]; + return 1; +} diff --git a/arch/sparc64/math-emu/faddd.c b/arch/sparc64/math-emu/faddd.c new file mode 100644 index 000000000..69b9f7c24 --- /dev/null +++ b/arch/sparc64/math-emu/faddd.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "double.h" + +int FADDD(void *rd, void *rs2, void *rs1) +{ + FP_DECL_D(A); FP_DECL_D(B); FP_DECL_D(R); + + __FP_UNPACK_D(A, rs1); + __FP_UNPACK_D(B, rs2); + FP_ADD_D(R, A, B); + __FP_PACK_D(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/faddq.c b/arch/sparc64/math-emu/faddq.c new file mode 100644 index 000000000..07245eb95 --- /dev/null +++ b/arch/sparc64/math-emu/faddq.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "quad.h" + +int FADDQ(void *rd, void *rs2, void *rs1) +{ + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(R); + + __FP_UNPACK_Q(A, rs1); + __FP_UNPACK_Q(B, rs2); + FP_ADD_Q(R, A, B); + __FP_PACK_Q(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fadds.c b/arch/sparc64/math-emu/fadds.c new file mode 100644 index 000000000..71295ae47 --- /dev/null +++ b/arch/sparc64/math-emu/fadds.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "single.h" + +int FADDS(void *rd, void *rs2, void *rs1) +{ + FP_DECL_S(A); FP_DECL_S(B); FP_DECL_S(R); + + __FP_UNPACK_S(A, rs1); + __FP_UNPACK_S(B, rs2); + FP_ADD_S(R, A, B); + __FP_PACK_S(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fcmpeq.c b/arch/sparc64/math-emu/fcmpeq.c new file mode 100644 index 000000000..cb37bc0db --- /dev/null +++ b/arch/sparc64/math-emu/fcmpeq.c @@ -0,0 +1,28 @@ +#include "soft-fp.h" +#include "quad.h" + +int FCMPEQ(void *rd, void *rs2, void *rs1) +{ + FP_DECL_Q(A); FP_DECL_Q(B); + long ret; + int fccno = ((long)rd) & 3; + unsigned long fsr; + + rd = (void *)(((long)rd)&~3); + __FP_UNPACK_Q(A, rs1); + __FP_UNPACK_Q(B, rs2); + FP_CMP_Q(ret, A, B, 3); + switch (ret) { + case 1: ret = 2; break; + case -1: ret = 1; break; + } + fsr = *(unsigned long *)rd; + switch (fccno) { + case 0: fsr &= ~0xc00; fsr |= (ret << 10); break; + case 1: fsr &= ~0x300000000UL; fsr |= (ret << 32); break; + case 2: fsr &= ~0xc00000000UL; fsr |= (ret << 34); break; + case 3: fsr &= ~0x3000000000UL; fsr |= (ret << 36); break; + } + *(unsigned long *)rd = fsr; + return 1; +} diff --git a/arch/sparc64/math-emu/fcmpq.c b/arch/sparc64/math-emu/fcmpq.c new file mode 100644 index 000000000..81dadf47a --- /dev/null +++ b/arch/sparc64/math-emu/fcmpq.c @@ -0,0 +1,28 @@ +#include "soft-fp.h" +#include "quad.h" + +int FCMPQ(void *rd, void *rs2, void *rs1) +{ + FP_DECL_Q(A); FP_DECL_Q(B); + long ret; + int fccno = ((long)rd) & 3; + unsigned long fsr; + + rd = (void *)(((long)rd)&~3); + __FP_UNPACK_Q(A, rs1); + __FP_UNPACK_Q(B, rs2); + FP_CMP_Q(ret, A, B, 3); + switch (ret) { + case 1: ret = 2; break; + case -1: ret = 1; break; + } + fsr = *(unsigned long *)rd; + switch (fccno) { + case 0: fsr &= ~0xc00; fsr |= (ret << 10); break; + case 1: fsr &= ~0x300000000UL; fsr |= (ret << 32); break; + case 2: fsr &= ~0xc00000000UL; fsr |= (ret << 34); break; + case 3: fsr &= ~0x3000000000UL; fsr |= (ret << 36); break; + } + *(unsigned long *)rd = fsr; + return 1; +} diff --git a/arch/sparc64/math-emu/fdivd.c b/arch/sparc64/math-emu/fdivd.c new file mode 100644 index 000000000..2984290fc --- /dev/null +++ b/arch/sparc64/math-emu/fdivd.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "double.h" + +int FDIVD(void *rd, void *rs2, void *rs1) +{ + FP_DECL_D(A); FP_DECL_D(B); FP_DECL_D(R); + + __FP_UNPACK_D(A, rs1); + __FP_UNPACK_D(B, rs2); + FP_DIV_D(R, A, B); + __FP_PACK_D(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fdivq.c b/arch/sparc64/math-emu/fdivq.c new file mode 100644 index 000000000..e5858c0af --- /dev/null +++ b/arch/sparc64/math-emu/fdivq.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "quad.h" + +int FDIVQ(void *rd, void *rs2, void *rs1) +{ + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(R); + + __FP_UNPACK_Q(A, rs1); + __FP_UNPACK_Q(B, rs2); + FP_DIV_Q(R, A, B); + __FP_PACK_Q(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fdivs.c b/arch/sparc64/math-emu/fdivs.c new file mode 100644 index 000000000..704f218c9 --- /dev/null +++ b/arch/sparc64/math-emu/fdivs.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "single.h" + +int FDIVS(void *rd, void *rs2, void *rs1) +{ + FP_DECL_S(A); FP_DECL_S(B); FP_DECL_S(R); + + __FP_UNPACK_S(A, rs1); + __FP_UNPACK_S(B, rs2); + FP_DIV_S(R, A, B); + __FP_PACK_S(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fdmulq.c b/arch/sparc64/math-emu/fdmulq.c new file mode 100644 index 000000000..7862a0039 --- /dev/null +++ b/arch/sparc64/math-emu/fdmulq.c @@ -0,0 +1,16 @@ +#include "soft-fp.h" +#include "quad.h" +#include "double.h" + +int FDMULQ(void *rd, void *rs2, void *rs1) +{ + FP_DECL_D(IN); FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(R); + + __FP_UNPACK_D(IN, rs1); + FP_CONV(Q,D,2,1,A,IN); + __FP_UNPACK_D(IN, rs2); + FP_CONV(Q,D,2,1,B,IN); + FP_MUL_Q(R, A, B); + __FP_PACK_Q(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fdtoi.c b/arch/sparc64/math-emu/fdtoi.c new file mode 100644 index 000000000..f7ba0f1f2 --- /dev/null +++ b/arch/sparc64/math-emu/fdtoi.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "double.h" + +int FDTOI(unsigned *rd, void *rs2) +{ + FP_DECL_D(A); + unsigned r; + + __FP_UNPACK_D(A, rs2); + FP_TO_INT_D(r, A, 32, 1); + *rd = r; + return 1; +} diff --git a/arch/sparc64/math-emu/fdtoq.c b/arch/sparc64/math-emu/fdtoq.c new file mode 100644 index 000000000..42e4009c6 --- /dev/null +++ b/arch/sparc64/math-emu/fdtoq.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "quad.h" +#include "double.h" + +int FDTOQ(void *rd, void *rs2) +{ + FP_DECL_D(A); FP_DECL_Q(R); + + __FP_UNPACK_D(A, rs2); + FP_CONV(Q,D,2,1,R,A); + __FP_PACK_Q(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fdtos.c b/arch/sparc64/math-emu/fdtos.c new file mode 100644 index 000000000..fb7fede54 --- /dev/null +++ b/arch/sparc64/math-emu/fdtos.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +int FDTOS(void *rd, void *rs2) +{ + FP_DECL_D(A); FP_DECL_S(R); + + __FP_UNPACK_D(A, rs2); + FP_CONV(S,D,1,1,R,A); + __FP_PACK_S(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fdtox.c b/arch/sparc64/math-emu/fdtox.c new file mode 100644 index 000000000..1a93b585c --- /dev/null +++ b/arch/sparc64/math-emu/fdtox.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "double.h" + +int FDTOX(unsigned long *rd, void *rs2) +{ + FP_DECL_D(A); + unsigned long r; + + __FP_UNPACK_D(A, rs2); + FP_TO_INT_D(r, A, 64, 1); + *rd = r; + return 1; +} diff --git a/arch/sparc64/math-emu/fitoq.c b/arch/sparc64/math-emu/fitoq.c new file mode 100644 index 000000000..5d08bbe18 --- /dev/null +++ b/arch/sparc64/math-emu/fitoq.c @@ -0,0 +1,12 @@ +#include "soft-fp.h" +#include "quad.h" + +int FITOQ(void *rd, void *rs2) +{ + FP_DECL_Q(R); + int a = *(int *)rs2; + + FP_FROM_INT_Q(R, a, 32, int); + __FP_PACK_Q(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fmovq.c b/arch/sparc64/math-emu/fmovq.c new file mode 100644 index 000000000..9294a339e --- /dev/null +++ b/arch/sparc64/math-emu/fmovq.c @@ -0,0 +1,6 @@ +int FMOVQ(unsigned long *rd, unsigned long *rs2) +{ + rd[0] = rs2[0]; + rd[1] = rs2[1]; + return 0; +} diff --git a/arch/sparc64/math-emu/fmuld.c b/arch/sparc64/math-emu/fmuld.c new file mode 100644 index 000000000..04d1f38df --- /dev/null +++ b/arch/sparc64/math-emu/fmuld.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "double.h" + +int FMULD(void *rd, void *rs2, void *rs1) +{ + FP_DECL_D(A); FP_DECL_D(B); FP_DECL_D(R); + + __FP_UNPACK_D(A, rs1); + __FP_UNPACK_D(B, rs2); + FP_MUL_D(R, A, B); + __FP_PACK_D(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fmulq.c b/arch/sparc64/math-emu/fmulq.c new file mode 100644 index 000000000..4d099be6f --- /dev/null +++ b/arch/sparc64/math-emu/fmulq.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "quad.h" + +int FMULQ(void *rd, void *rs2, void *rs1) +{ + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(R); + + __FP_UNPACK_Q(A, rs1); + __FP_UNPACK_Q(B, rs2); + FP_MUL_Q(R, A, B); + __FP_PACK_Q(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fmuls.c b/arch/sparc64/math-emu/fmuls.c new file mode 100644 index 000000000..8a358d030 --- /dev/null +++ b/arch/sparc64/math-emu/fmuls.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "single.h" + +int FMULS(void *rd, void *rs2, void *rs1) +{ + FP_DECL_S(A); FP_DECL_S(B); FP_DECL_S(R); + + __FP_UNPACK_S(A, rs1); + __FP_UNPACK_S(B, rs2); + FP_MUL_S(R, A, B); + __FP_PACK_S(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fnegq.c b/arch/sparc64/math-emu/fnegq.c new file mode 100644 index 000000000..dcdea3202 --- /dev/null +++ b/arch/sparc64/math-emu/fnegq.c @@ -0,0 +1,18 @@ +#include "soft-fp.h" +#include "quad.h" + +int FNEGQ(unsigned long *rd, unsigned long *rs2) +{ +/* + FP_DECL_Q(A); FP_DECL_Q(R); + + __FP_UNPACK_Q(A, rs2); + FP_NEG_Q(R, A); + __FP_PACK_Q(rd, R); + */ + rd[0] = rs2[0] ^ 0x8000000000000000UL; + rd[1] = rs2[1]; + return 1; +} + + diff --git a/arch/sparc64/math-emu/fqtod.c b/arch/sparc64/math-emu/fqtod.c new file mode 100644 index 000000000..1f9161999 --- /dev/null +++ b/arch/sparc64/math-emu/fqtod.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "quad.h" +#include "double.h" + +int FQTOD(void *rd, void *rs2) +{ + FP_DECL_Q(A); FP_DECL_D(R); + + __FP_UNPACK_Q(A, rs2); + FP_CONV(D,Q,1,2,R,A); + __FP_PACK_D(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fqtoi.c b/arch/sparc64/math-emu/fqtoi.c new file mode 100644 index 000000000..06d67ff81 --- /dev/null +++ b/arch/sparc64/math-emu/fqtoi.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "quad.h" + +int FQTOI(unsigned *rd, void *rs2) +{ + FP_DECL_Q(A); + unsigned r; + + __FP_UNPACK_Q(A, rs2); + FP_TO_INT_Q(r, A, 32, 1); + *rd = r; + return 1; +} diff --git a/arch/sparc64/math-emu/fqtos.c b/arch/sparc64/math-emu/fqtos.c new file mode 100644 index 000000000..833f10618 --- /dev/null +++ b/arch/sparc64/math-emu/fqtos.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "quad.h" +#include "single.h" + +int FQTOS(void *rd, void *rs2) +{ + FP_DECL_Q(A); FP_DECL_S(R); + + __FP_UNPACK_Q(A, rs2); + FP_CONV(S,Q,1,2,R,A); + __FP_PACK_S(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fqtox.c b/arch/sparc64/math-emu/fqtox.c new file mode 100644 index 000000000..6cdabc8cd --- /dev/null +++ b/arch/sparc64/math-emu/fqtox.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "quad.h" + +int FQTOX(unsigned long *rd, void *rs2) +{ + FP_DECL_Q(A); + unsigned long r; + + __FP_UNPACK_Q(A, rs2); + FP_TO_INT_Q(r, A, 64, 1); + *rd = r; + return 1; +} diff --git a/arch/sparc64/math-emu/fsmuld.c b/arch/sparc64/math-emu/fsmuld.c new file mode 100644 index 000000000..1a0eefd06 --- /dev/null +++ b/arch/sparc64/math-emu/fsmuld.c @@ -0,0 +1,16 @@ +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +int FSMULD(void *rd, void *rs2, void *rs1) +{ + FP_DECL_S(IN); FP_DECL_D(A); FP_DECL_D(B); FP_DECL_D(R); + + __FP_UNPACK_S(IN, rs1); + FP_CONV(D,S,1,1,A,IN); + __FP_UNPACK_S(IN, rs2); + FP_CONV(D,S,1,1,B,IN); + FP_MUL_D(R, A, B); + __FP_PACK_D(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fsqrtd.c b/arch/sparc64/math-emu/fsqrtd.c new file mode 100644 index 000000000..59f5e3bf3 --- /dev/null +++ b/arch/sparc64/math-emu/fsqrtd.c @@ -0,0 +1,12 @@ +#include "soft-fp.h" +#include "double.h" + +int FSQRTD(void *rd, void *rs2) +{ + FP_DECL_D(A); FP_DECL_D(R); + + __FP_UNPACK_D(A, rs2); + FP_SQRT_D(R, A); + __FP_PACK_D(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fsqrtq.c b/arch/sparc64/math-emu/fsqrtq.c new file mode 100644 index 000000000..8f84aa850 --- /dev/null +++ b/arch/sparc64/math-emu/fsqrtq.c @@ -0,0 +1,12 @@ +#include "soft-fp.h" +#include "quad.h" + +int FSQRTQ(void *rd, void *rs2) +{ + FP_DECL_Q(A); FP_DECL_Q(R); + + __FP_UNPACK_Q(A, rs2); + FP_SQRT_Q(R, A); + __FP_PACK_Q(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fsqrts.c b/arch/sparc64/math-emu/fsqrts.c new file mode 100644 index 000000000..d57cdc98e --- /dev/null +++ b/arch/sparc64/math-emu/fsqrts.c @@ -0,0 +1,12 @@ +#include "soft-fp.h" +#include "single.h" + +int FSQRTS(void *rd, void *rs2) +{ + FP_DECL_S(A); FP_DECL_S(R); + + __FP_UNPACK_S(A, rs2); + FP_SQRT_S(R, A); + __FP_PACK_S(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fstod.c b/arch/sparc64/math-emu/fstod.c new file mode 100644 index 000000000..60f1bc8a4 --- /dev/null +++ b/arch/sparc64/math-emu/fstod.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +int FSTOD(void *rd, void *rs2) +{ + FP_DECL_S(A); FP_DECL_D(R); + + __FP_UNPACK_S(A, rs2); + FP_CONV(D,S,1,1,R,A); + __FP_PACK_D(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fstoi.c b/arch/sparc64/math-emu/fstoi.c new file mode 100644 index 000000000..4adc7d170 --- /dev/null +++ b/arch/sparc64/math-emu/fstoi.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "single.h" + +int FSTOI(unsigned *rd, void *rs2) +{ + FP_DECL_S(A); + unsigned r; + + __FP_UNPACK_S(A, rs2); + FP_TO_INT_S(r, A, 32, 1); + *rd = r; + return 1; +} diff --git a/arch/sparc64/math-emu/fstoq.c b/arch/sparc64/math-emu/fstoq.c new file mode 100644 index 000000000..06313a77f --- /dev/null +++ b/arch/sparc64/math-emu/fstoq.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "quad.h" +#include "single.h" + +int FSTOQ(void *rd, void *rs2) +{ + FP_DECL_S(A); FP_DECL_Q(R); + + __FP_UNPACK_S(A, rs2); + FP_CONV(Q,S,2,1,R,A); + __FP_PACK_Q(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fstox.c b/arch/sparc64/math-emu/fstox.c new file mode 100644 index 000000000..555fba2ee --- /dev/null +++ b/arch/sparc64/math-emu/fstox.c @@ -0,0 +1,13 @@ +#include "soft-fp.h" +#include "single.h" + +int FSTOX(unsigned long *rd, void *rs2) +{ + FP_DECL_S(A); + unsigned long r; + + __FP_UNPACK_S(A, rs2); + FP_TO_INT_S(r, A, 64, 1); + *rd = r; + return 1; +} diff --git a/arch/sparc64/math-emu/fsubd.c b/arch/sparc64/math-emu/fsubd.c new file mode 100644 index 000000000..b0f451b32 --- /dev/null +++ b/arch/sparc64/math-emu/fsubd.c @@ -0,0 +1,15 @@ +#include "soft-fp.h" +#include "double.h" + +int FSUBD(void *rd, void *rs2, void *rs1) +{ + FP_DECL_D(A); FP_DECL_D(B); FP_DECL_D(R); + + __FP_UNPACK_D(A, rs1); + __FP_UNPACK_D(B, rs2); + if (B_c != FP_CLS_NAN) + B_s ^= 1; + FP_ADD_D(R, A, B); + __FP_PACK_D(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fsubq.c b/arch/sparc64/math-emu/fsubq.c new file mode 100644 index 000000000..ef006a540 --- /dev/null +++ b/arch/sparc64/math-emu/fsubq.c @@ -0,0 +1,15 @@ +#include "soft-fp.h" +#include "quad.h" + +int FSUBQ(void *rd, void *rs2, void *rs1) +{ + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(R); + + __FP_UNPACK_Q(A, rs1); + __FP_UNPACK_Q(B, rs2); + if (B_c != FP_CLS_NAN) + B_s ^= 1; + FP_ADD_Q(R, A, B); + __FP_PACK_Q(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fsubs.c b/arch/sparc64/math-emu/fsubs.c new file mode 100644 index 000000000..d060f095a --- /dev/null +++ b/arch/sparc64/math-emu/fsubs.c @@ -0,0 +1,15 @@ +#include "soft-fp.h" +#include "single.h" + +int FSUBS(void *rd, void *rs2, void *rs1) +{ + FP_DECL_S(A); FP_DECL_S(B); FP_DECL_S(R); + + __FP_UNPACK_S(A, rs1); + __FP_UNPACK_S(B, rs2); + if (B_c != FP_CLS_NAN) + B_s ^= 1; + FP_ADD_S(R, A, B); + __FP_PACK_S(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/fxtoq.c b/arch/sparc64/math-emu/fxtoq.c new file mode 100644 index 000000000..7c2f7df48 --- /dev/null +++ b/arch/sparc64/math-emu/fxtoq.c @@ -0,0 +1,12 @@ +#include "soft-fp.h" +#include "quad.h" + +int FXTOQ(void *rd, void *rs2) +{ + FP_DECL_Q(R); + long a = *(long *)rs2; + + FP_FROM_INT_Q(R, a, 64, long); + __FP_PACK_Q(rd, R); + return 1; +} diff --git a/arch/sparc64/math-emu/math.c b/arch/sparc64/math-emu/math.c new file mode 100644 index 000000000..58ed21062 --- /dev/null +++ b/arch/sparc64/math-emu/math.c @@ -0,0 +1,209 @@ +/* $Id: math.c,v 1.3 1997/10/15 07:28:55 jj Exp $ + * arch/sparc64/math-emu/math.c + * + * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * + * Emulation routines originate from soft-fp package, which is part + * of glibc and has appropriate copyrights in it. + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/sched.h> + +#include <asm/fpumacro.h> +#include <asm/ptrace.h> +#include <asm/uaccess.h> + +#define FLOATFUNC(x) extern int x(void *,void *,void *); + +FLOATFUNC(FMOVQ) +FLOATFUNC(FNEGQ) +FLOATFUNC(FABSQ) +FLOATFUNC(FSQRTQ) +FLOATFUNC(FADDQ) +FLOATFUNC(FSUBQ) +FLOATFUNC(FMULQ) +FLOATFUNC(FDIVQ) +FLOATFUNC(FDMULQ) +FLOATFUNC(FQTOX) +FLOATFUNC(FXTOQ) +FLOATFUNC(FQTOS) +FLOATFUNC(FQTOD) +FLOATFUNC(FITOQ) +FLOATFUNC(FSTOQ) +FLOATFUNC(FDTOQ) +FLOATFUNC(FQTOI) +FLOATFUNC(FCMPQ) +FLOATFUNC(FCMPEQ) + +FLOATFUNC(FSQRTS) +FLOATFUNC(FSQRTD) +FLOATFUNC(FADDS) +FLOATFUNC(FADDD) +FLOATFUNC(FSUBS) +FLOATFUNC(FSUBD) +FLOATFUNC(FMULS) +FLOATFUNC(FMULD) +FLOATFUNC(FDIVS) +FLOATFUNC(FDIVD) +FLOATFUNC(FSMULD) +FLOATFUNC(FSTOX) +FLOATFUNC(FDTOX) +FLOATFUNC(FDTOS) +FLOATFUNC(FSTOD) +FLOATFUNC(FSTOI) +FLOATFUNC(FDTOI) + +int do_mathemu(struct pt_regs *regs, struct fpustate *f) +{ + unsigned long pc = regs->tpc; + unsigned long tstate = regs->tstate; + u32 insn = 0; + int type = 0; /* 01 is single, 10 is double, 11 is quad, + 000011 is rs1, 001100 is rs2, 110000 is rd (00 in rd is fcc) + 111100000000 tells which ftt may that happen in */ + int freg; + static u64 zero[2] = { 0L, 0L }; + int flags; + int (*func)(void *,void *,void *) = NULL; + + if(tstate & TSTATE_PRIV) + die_if_kernel("FPQuad from kernel", regs); + MOD_INC_USE_COUNT; + if(current->tss.flags & SPARC_FLAG_32BIT) + pc = (u32)pc; + if (get_user(insn, (u32 *)pc) != -EFAULT) { + if ((insn & 0xc1f80000) == 0x81a00000) /* FPOP1 */ { + switch ((insn >> 5) & 0x1ff) { + /* QUAD - ftt == 3 */ + case 0x003: type = 0x33c; func = FMOVQ; break; + case 0x007: type = 0x33c; func = FNEGQ; break; + case 0x00b: type = 0x33c; func = FABSQ; break; + case 0x02b: type = 0x33c; func = FSQRTQ; break; + case 0x043: type = 0x33f; func = FADDQ; break; + case 0x047: type = 0x33f; func = FSUBQ; break; + case 0x04b: type = 0x33f; func = FMULQ; break; + case 0x04f: type = 0x33f; func = FDIVQ; break; + case 0x06e: type = 0x33a; func = FDMULQ; break; + case 0x083: type = 0x32c; func = FQTOX; break; + case 0x08c: type = 0x338; func = FXTOQ; break; + case 0x0c7: type = 0x31c; func = FQTOS; break; + case 0x0cb: type = 0x32c; func = FQTOD; break; + case 0x0cc: type = 0x334; func = FITOQ; break; + case 0x0cd: type = 0x334; func = FSTOQ; break; + case 0x0ce: type = 0x338; func = FDTOQ; break; + case 0x0d3: type = 0x31c; func = FQTOI; break; + /* SUBNORMAL - ftt == 2 */ + case 0x029: type = 0x214; func = FSQRTS; break; + case 0x02a: type = 0x228; func = FSQRTD; break; + case 0x041: type = 0x215; func = FADDS; break; + case 0x042: type = 0x22a; func = FADDD; break; + case 0x045: type = 0x215; func = FSUBS; break; + case 0x046: type = 0x22a; func = FSUBD; break; + case 0x049: type = 0x215; func = FMULS; break; + case 0x04a: type = 0x22a; func = FMULD; break; + case 0x04d: type = 0x215; func = FDIVS; break; + case 0x04e: type = 0x22a; func = FDIVD; break; + case 0x069: type = 0x225; func = FSMULD; break; + case 0x081: type = 0x224; func = FSTOX; break; + case 0x082: type = 0x228; func = FDTOX; break; + case 0x0c6: type = 0x218; func = FDTOS; break; + case 0x0c9: type = 0x224; func = FSTOD; break; + case 0x0d1: type = 0x214; func = FSTOI; break; + case 0x0d2: type = 0x218; func = FDTOI; break; + } + } + else if ((insn & 0xc1f80000) == 0x81a80000) /* FPOP2 */ { + switch ((insn >> 5) & 0x1ff) { + case 0x053: type = 0x30f; func = FCMPQ; break; + case 0x057: type = 0x30f; func = FCMPEQ; break; + } + } + } + if (type) { + void *rs1 = NULL, *rs2 = NULL, *rd = NULL; + + freg = (f->fsr >> 14) & 0xf; + if (freg != (type >> 8)) + goto err; + f->fsr &= ~0x1c000; + freg = ((insn >> 14) & 0x1f); + switch (type & 0x3) { + case 3: if (freg & 2) { + f->fsr |= (6 << 14) /* invalid_fp_register */; + goto err; + } + case 2: freg = ((freg & 1) << 5) | (freg & 0x1e); + case 1: rs1 = (void *)&f->regs[freg]; + flags = (freg < 32) ? SPARC_FLAG_USEDFPUL : SPARC_FLAG_USEDFPUU; + if (!(current->tss.flags & flags)) + rs1 = (void *)&zero; + break; + } + freg = (insn & 0x1f); + switch ((type >> 2) & 0x3) { + case 3: if (freg & 2) { + f->fsr |= (6 << 14) /* invalid_fp_register */; + goto err; + } + case 2: freg = ((freg & 1) << 5) | (freg & 0x1e); + case 1: rs2 = (void *)&f->regs[freg]; + flags = (freg < 32) ? SPARC_FLAG_USEDFPUL : SPARC_FLAG_USEDFPUU; + if (!(current->tss.flags & flags)) + rs2 = (void *)&zero; + break; + } + freg = ((insn >> 25) & 0x1f); + switch ((type >> 4) & 0x3) { + case 0: rd = (void *)(((long)&f->fsr) | (freg & 3)); break; + case 3: if (freg & 2) { + f->fsr |= (6 << 14) /* invalid_fp_register */; + goto err; + } + case 2: freg = ((freg & 1) << 5) | (freg & 0x1e); + case 1: rd = (void *)&f->regs[freg]; + flags = (freg < 32) ? SPARC_FLAG_USEDFPUL : SPARC_FLAG_USEDFPUU; + regs->fprs |= FPRS_FEF; + if (!(current->tss.flags & SPARC_FLAG_USEDFPU)) { + current->tss.flags |= SPARC_FLAG_USEDFPU; + f->fsr = 0; + f->gsr = 0; + } + if (!(current->tss.flags & flags)) { + if (freg < 32) + memset(f->regs, 0, 32*sizeof(u32)); + else + memset(f->regs+32, 0, 32*sizeof(u32)); + } + current->tss.flags |= flags; + break; + } + func(rd, rs2, rs1); + regs->tpc = regs->tnpc; + regs->tnpc += 4; + MOD_DEC_USE_COUNT; + return 1; + } +err: MOD_DEC_USE_COUNT; + return 0; +} + +#ifdef MODULE + +MODULE_AUTHOR("Jakub Jelinek (jj@sunsite.mff.cuni.cz), Richard Henderson (rth@cygnus.com)"); +MODULE_DESCRIPTION("FPU emulation module"); + +extern int (*handle_mathemu)(struct pt_regs *, struct fpustate *); + +int init_module(void) +{ + handle_mathemu = do_mathemu; + return 0; +} + +void cleanup_module(void) +{ + handle_mathemu = NULL; +} +#endif diff --git a/arch/sparc64/math-emu/op-1.h b/arch/sparc64/math-emu/op-1.h new file mode 100644 index 000000000..87960c287 --- /dev/null +++ b/arch/sparc64/math-emu/op-1.h @@ -0,0 +1,245 @@ +/* + * Basic one-word fraction declaration and manipulation. + */ + +#define _FP_FRAC_DECL_1(X) _FP_W_TYPE X##_f +#define _FP_FRAC_COPY_1(D,S) (D##_f = S##_f) +#define _FP_FRAC_SET_1(X,I) (X##_f = I) +#define _FP_FRAC_HIGH_1(X) (X##_f) +#define _FP_FRAC_LOW_1(X) (X##_f) +#define _FP_FRAC_WORD_1(X,w) (X##_f) + +#define _FP_FRAC_ADDI_1(X,I) (X##_f += I) +#define _FP_FRAC_SLL_1(X,N) \ + do { \ + if (__builtin_constant_p(N) && (N) == 1) \ + X##_f += X##_f; \ + else \ + X##_f <<= (N); \ + } while (0) +#define _FP_FRAC_SRL_1(X,N) (X##_f >>= N) + +/* Right shift with sticky-lsb. */ +#define _FP_FRAC_SRS_1(X,N,sz) __FP_FRAC_SRS_1(X##_f, N, sz) + +#define __FP_FRAC_SRS_1(X,N,sz) \ + (X = (X >> (N) | (__builtin_constant_p(N) && (N) == 1 \ + ? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0))) + +#define _FP_FRAC_ADD_1(R,X,Y) (R##_f = X##_f + Y##_f) +#define _FP_FRAC_SUB_1(R,X,Y) (R##_f = X##_f - Y##_f) +#define _FP_FRAC_CLZ_1(z, X) __FP_CLZ(z, X##_f) + +/* Predicates */ +#define _FP_FRAC_NEGP_1(X) ((_FP_WS_TYPE)X##_f < 0) +#define _FP_FRAC_ZEROP_1(X) (X##_f == 0) +#define _FP_FRAC_OVERP_1(fs,X) (X##_f & _FP_OVERFLOW_##fs) +#define _FP_FRAC_EQ_1(X, Y) (X##_f == Y##_f) +#define _FP_FRAC_GE_1(X, Y) (X##_f >= Y##_f) +#define _FP_FRAC_GT_1(X, Y) (X##_f > Y##_f) + +#define _FP_ZEROFRAC_1 0 +#define _FP_MINFRAC_1 1 + +/* + * Unpack the raw bits of a native fp value. Do not classify or + * normalize the data. + */ + +#define _FP_UNPACK_RAW_1(fs, X, val) \ + do { \ + union _FP_UNION_##fs _flo; _flo.flt = (val); \ + \ + X##_f = _flo.bits.frac; \ + X##_e = _flo.bits.exp; \ + X##_s = _flo.bits.sign; \ + } while (0) + + +/* + * Repack the raw bits of a native fp value. + */ + +#define _FP_PACK_RAW_1(fs, val, X) \ + do { \ + union _FP_UNION_##fs _flo; \ + \ + _flo.bits.frac = X##_f; \ + _flo.bits.exp = X##_e; \ + _flo.bits.sign = X##_s; \ + \ + (val) = _flo.flt; \ + } while (0) + + +/* + * Multiplication algorithms: + */ + +/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the + multiplication immediately. */ + +#define _FP_MUL_MEAT_1_imm(fs, R, X, Y) \ + do { \ + R##_f = X##_f * Y##_f; \ + /* Normalize since we know where the msb of the multiplicands \ + were (bit B), we know that the msb of the of the product is \ + at either 2B or 2B-1. */ \ + _FP_FRAC_SRS_1(R, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \ + } while (0) + +/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ + +#define _FP_MUL_MEAT_1_wide(fs, R, X, Y, doit) \ + do { \ + _FP_W_TYPE _Z_f0, _Z_f1; \ + doit(_Z_f1, _Z_f0, X##_f, Y##_f); \ + /* Normalize since we know where the msb of the multiplicands \ + were (bit B), we know that the msb of the of the product is \ + at either 2B or 2B-1. */ \ + _FP_FRAC_SRS_2(_Z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \ + R##_f = _Z_f0; \ + } while (0) + +/* Finally, a simple widening multiply algorithm. What fun! */ + +#define _FP_MUL_MEAT_1_hard(fs, R, X, Y) \ + do { \ + _FP_W_TYPE _xh, _xl, _yh, _yl, _z_f0, _z_f1, _a_f0, _a_f1; \ + \ + /* split the words in half */ \ + _xh = X##_f >> (_FP_W_TYPE_SIZE/2); \ + _xl = X##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \ + _yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \ + _yl = Y##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \ + \ + /* multiply the pieces */ \ + _z_f0 = _xl * _yl; \ + _a_f0 = _xh * _yl; \ + _a_f1 = _xl * _yh; \ + _z_f1 = _xh * _yh; \ + \ + /* reassemble into two full words */ \ + if ((_a_f0 += _a_f1) < _a_f1) \ + _z_f1 += (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2); \ + _a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2); \ + _a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2); \ + _FP_FRAC_ADD_2(_z, _z, _a); \ + \ + /* normalize */ \ + _FP_FRAC_SRS_2(_z, _FP_WFRACBITS_##fs - 1, 2*_FP_WFRACBITS_##fs); \ + R##_f = _z_f0; \ + } while (0) + + +/* + * Division algorithms: + */ + +/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the + division immediately. Give this macro either _FP_DIV_HELP_imm for + C primitives or _FP_DIV_HELP_ldiv for the ISO function. Which you + choose will depend on what the compiler does with divrem4. */ + +#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \ + do { \ + _FP_W_TYPE _q, _r; \ + X##_f <<= (X##_f < Y##_f \ + ? R##_e--, _FP_WFRACBITS_##fs \ + : _FP_WFRACBITS_##fs - 1); \ + doit(_q, _r, X##_f, Y##_f); \ + R##_f = _q | (_r != 0); \ + } while (0) + +/* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd + that may be useful in this situation. This first is for a primitive + that requires normalization, the second for one that does not. Look + for UDIV_NEEDS_NORMALIZATION to tell which your machine needs. */ + +#define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y) \ + do { \ + _FP_W_TYPE _nh, _nl, _q, _r; \ + \ + /* Normalize Y -- i.e. make the most significant bit set. */ \ + Y##_f <<= _FP_WFRACXBITS_##fs - 1; \ + \ + /* Shift X op correspondingly high, that is, up one full word. */ \ + if (X##_f <= Y##_f) \ + { \ + _nl = 0; \ + _nh = X##_f; \ + } \ + else \ + { \ + R##_e++; \ + _nl = X##_f << (_FP_W_TYPE_SIZE-1); \ + _nh = X##_f >> 1; \ + } \ + \ + udiv_qrnnd(_q, _r, _nh, _nl, Y##_f); \ + R##_f = _q | (_r != 0); \ + } while (0) + +#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \ + do { \ + _FP_W_TYPE _nh, _nl, _q, _r; \ + if (X##_f < Y##_f) \ + { \ + R##_e--; \ + _nl = X##_f << _FP_WFRACBITS_##fs; \ + _nh = X##_f >> _FP_WFRACXBITS_##fs; \ + } \ + else \ + { \ + _nl = X##_f << (_FP_WFRACBITS_##fs - 1); \ + _nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \ + } \ + udiv_qrnnd(_q, _r, _nh, _nl, Y##_f); \ + R##_f = _q | (_r != 0); \ + } while (0) + + +/* + * Square root algorithms: + * We have just one right now, maybe Newton approximation + * should be added for those machines where division is fast. + */ + +#define _FP_SQRT_MEAT_1(R, S, T, X, q) \ + do { \ + while (q) \ + { \ + T##_f = S##_f + q; \ + if (T##_f <= X##_f) \ + { \ + S##_f = T##_f + q; \ + X##_f -= T##_f; \ + R##_f += q; \ + } \ + _FP_FRAC_SLL_1(X, 1); \ + q >>= 1; \ + } \ + } while (0) + +/* + * Assembly/disassembly for converting to/from integral types. + * No shifting or overflow handled here. + */ + +#define _FP_FRAC_ASSEMBLE_1(r, X, rsize) (r = X##_f) +#define _FP_FRAC_DISASSEMBLE_1(X, r, rsize) (X##_f = r) + + +/* + * Convert FP values between word sizes + */ + +#define _FP_FRAC_CONV_1_1(dfs, sfs, D, S) \ + do { \ + D##_f = S##_f; \ + if (_FP_WFRACBITS_##sfs > _FP_WFRACBITS_##dfs) \ + _FP_FRAC_SRS_1(D, (_FP_WFRACBITS_##sfs-_FP_WFRACBITS_##dfs), \ + _FP_WFRACBITS_##sfs); \ + else \ + D##_f <<= _FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs; \ + } while (0) diff --git a/arch/sparc64/math-emu/op-2.h b/arch/sparc64/math-emu/op-2.h new file mode 100644 index 000000000..879b6004f --- /dev/null +++ b/arch/sparc64/math-emu/op-2.h @@ -0,0 +1,408 @@ +/* + * Basic two-word fraction declaration and manipulation. + */ + +#define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1 +#define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1) +#define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I) +#define _FP_FRAC_HIGH_2(X) (X##_f1) +#define _FP_FRAC_LOW_2(X) (X##_f0) +#define _FP_FRAC_WORD_2(X,w) (X##_f##w) + +#define _FP_FRAC_SLL_2(X,N) \ + do { \ + if ((N) < _FP_W_TYPE_SIZE) \ + { \ + if (__builtin_constant_p(N) && (N) == 1) \ + { \ + X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \ + X##_f0 += X##_f0; \ + } \ + else \ + { \ + X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \ + X##_f0 <<= (N); \ + } \ + } \ + else \ + { \ + X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \ + X##_f0 = 0; \ + } \ + } while (0) + +#define _FP_FRAC_SRL_2(X,N) \ + do { \ + if ((N) < _FP_W_TYPE_SIZE) \ + { \ + X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \ + X##_f1 >>= (N); \ + } \ + else \ + { \ + X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \ + X##_f1 = 0; \ + } \ + } while (0) + +/* Right shift with sticky-lsb. */ +#define _FP_FRAC_SRS_2(X,N,sz) \ + do { \ + if ((N) < _FP_W_TYPE_SIZE) \ + { \ + X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \ + (__builtin_constant_p(N) && (N) == 1 \ + ? X##_f0 & 1 \ + : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \ + X##_f1 >>= (N); \ + } \ + else \ + { \ + X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \ + (((X##_f1 << (sz - (N))) | X##_f0) != 0)); \ + X##_f1 = 0; \ + } \ + } while (0) + +#define _FP_FRAC_ADDI_2(X,I) \ + __FP_FRAC_ADDI_2(X##_f1, X##_f0, I) + +#define _FP_FRAC_ADD_2(R,X,Y) \ + __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) + +#define _FP_FRAC_SUB_2(R,X,Y) \ + __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) + +#define _FP_FRAC_CLZ_2(R,X) \ + do { \ + if (X##_f1) \ + __FP_CLZ(R,X##_f1); \ + else \ + { \ + __FP_CLZ(R,X##_f0); \ + R += _FP_W_TYPE_SIZE; \ + } \ + } while(0) + +/* Predicates */ +#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0) +#define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0) +#define _FP_FRAC_OVERP_2(fs,X) (X##_f1 & _FP_OVERFLOW_##fs) +#define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0) +#define _FP_FRAC_GT_2(X, Y) \ + (X##_f1 > Y##_f1 || X##_f1 == Y##_f1 && X##_f0 > Y##_f0) +#define _FP_FRAC_GE_2(X, Y) \ + (X##_f1 > Y##_f1 || X##_f1 == Y##_f1 && X##_f0 >= Y##_f0) + +#define _FP_ZEROFRAC_2 0, 0 +#define _FP_MINFRAC_2 0, 1 + +/* + * Internals + */ + +#define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1) + +#define __FP_CLZ_2(R, xh, xl) \ + do { \ + if (xh) \ + __FP_CLZ(R,xl); \ + else \ + { \ + __FP_CLZ(R,xl); \ + R += _FP_W_TYPE_SIZE; \ + } \ + } while(0) + +#if 0 + +#ifndef __FP_FRAC_ADDI_2 +#define __FP_FRAC_ADDI_2(xh, xl, i) \ + (xh += ((xl += i) < i)) +#endif +#ifndef __FP_FRAC_ADD_2 +#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \ + (rh = xh + yh + ((rl = xl + yl) < xl)) +#endif +#ifndef __FP_FRAC_SUB_2 +#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \ + (rh = xh - yh - ((rl = xl - yl) > xl)) +#endif + +#else + +#undef __FP_FRAC_ADDI_2 +#define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i) +#undef __FP_FRAC_ADD_2 +#define __FP_FRAC_ADD_2 add_ssaaaa +#undef __FP_FRAC_SUB_2 +#define __FP_FRAC_SUB_2 sub_ddmmss + +#endif + +/* + * Unpack the raw bits of a native fp value. Do not classify or + * normalize the data. + */ + +#define _FP_UNPACK_RAW_2(fs, X, val) \ + do { \ + union _FP_UNION_##fs _flo; _flo.flt = (val); \ + \ + X##_f0 = _flo.bits.frac0; \ + X##_f1 = _flo.bits.frac1; \ + X##_e = _flo.bits.exp; \ + X##_s = _flo.bits.sign; \ + } while (0) + + +/* + * Repack the raw bits of a native fp value. + */ + +#define _FP_PACK_RAW_2(fs, val, X) \ + do { \ + union _FP_UNION_##fs _flo; \ + \ + _flo.bits.frac0 = X##_f0; \ + _flo.bits.frac1 = X##_f1; \ + _flo.bits.exp = X##_e; \ + _flo.bits.sign = X##_s; \ + \ + (val) = _flo.flt; \ + } while (0) + + +/* + * Multiplication algorithms: + */ + +/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ + +#define _FP_MUL_MEAT_2_wide(fs, R, X, Y, doit) \ + do { \ + _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ + \ + doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \ + doit(_b_f1, _b_f0, X##_f0, Y##_f1); \ + doit(_c_f1, _c_f0, X##_f1, Y##_f0); \ + doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \ + \ + __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ + _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0), \ + _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ + _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0), \ + 0, _b_f1, _b_f0, 0); \ + __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ + _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0), \ + _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ + _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0), \ + 0, _c_f1, _c_f0, 0); \ + \ + /* Normalize since we know where the msb of the multiplicands \ + were (bit B), we know that the msb of the of the product is \ + at either 2B or 2B-1. */ \ + _FP_FRAC_SRS_4(_z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \ + R##_f0 = _FP_FRAC_WORD_4(_z,0); \ + R##_f1 = _FP_FRAC_WORD_4(_z,1); \ + } while (0) + +#define _FP_MUL_MEAT_2_gmp(fs, R, X, Y) \ + do { \ + _FP_W_TYPE _x[2], _y[2], _z[4]; \ + _x[0] = X##_f0; _x[1] = X##_f1; \ + _y[0] = Y##_f0; _y[1] = Y##_f1; \ + \ + mpn_mul_n(_z, _x, _y, 2); \ + \ + /* Normalize since we know where the msb of the multiplicands \ + were (bit B), we know that the msb of the of the product is \ + at either 2B or 2B-1. */ \ + _FP_FRAC_SRS_4(_z, _FP_WFRACBITS##_fs-1, 2*_FP_WFRACBITS_##fs); \ + R##_f0 = _z[0]; \ + R##_f1 = _z[1]; \ + } while (0) + + +/* + * Division algorithms: + */ + +#define _FP_DIV_MEAT_2_udiv_64(fs, R, X, Y) \ + do { \ + extern void _fp_udivmodti4(_FP_W_TYPE q[2], _FP_W_TYPE r[2], \ + _FP_W_TYPE n1, _FP_W_TYPE n0, \ + _FP_W_TYPE d1, _FP_W_TYPE d0); \ + _FP_W_TYPE _n_f3, _n_f2, _n_f1, _n_f0, _r_f1, _r_f0; \ + _FP_W_TYPE _q_f1, _q_f0, _m_f1, _m_f0; \ + _FP_W_TYPE _rmem[2], _qmem[2]; \ + \ + if (_FP_FRAC_GT_2(X, Y)) \ + { \ + R##_e++; \ + _n_f3 = X##_f1 >> 1; \ + _n_f2 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \ + _n_f1 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \ + _n_f0 = 0; \ + } \ + else \ + { \ + _n_f3 = X##_f1; \ + _n_f2 = X##_f0; \ + _n_f1 = _n_f0 = 0; \ + } \ + \ + /* Normalize, i.e. make the most significant bit of the \ + denominator set. */ \ + _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs - 1); \ + \ + /* Do the 256/128 bit division given the 128-bit _fp_udivmodtf4 \ + primitive snagged from libgcc2.c. */ \ + \ + _fp_udivmodti4(_qmem, _rmem, _n_f3, _n_f2, 0, Y##_f1); \ + _q_f1 = _qmem[0]; \ + umul_ppmm(_m_f1, _m_f0, _q_f1, Y##_f0); \ + _r_f1 = _rmem[0]; \ + _r_f0 = _n_f1; \ + if (_FP_FRAC_GT_2(_m, _r)) \ + { \ + _q_f1--; \ + _FP_FRAC_ADD_2(_r, _r, Y); \ + if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ + { \ + _q_f1--; \ + _FP_FRAC_ADD_2(_r, _r, Y); \ + } \ + } \ + _FP_FRAC_SUB_2(_r, _r, _m); \ + \ + _fp_udivmodti4(_qmem, _rmem, _r_f1, _r_f0, 0, Y##_f1); \ + _q_f0 = _qmem[0]; \ + umul_ppmm(_m_f1, _m_f0, _q_f0, Y##_f0); \ + _r_f1 = _rmem[0]; \ + _r_f0 = _n_f0; \ + if (_FP_FRAC_GT_2(_m, _r)) \ + { \ + _q_f0--; \ + _FP_FRAC_ADD_2(_r, _r, Y); \ + if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ + { \ + _q_f0--; \ + _FP_FRAC_ADD_2(_r, _r, Y); \ + } \ + } \ + _FP_FRAC_SUB_2(_r, _r, _m); \ + \ + R##_f1 = _q_f1; \ + R##_f0 = _q_f0 | ((_r_f1 | _r_f0) != 0); \ + } while (0) + + +#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \ + do { \ + _FP_W_TYPE _x[4], _y[2], _z[4]; \ + _y[0] = Y##_f0; _y[1] = Y##_f1; \ + _x[0] = _x[3] = 0; \ + if (_FP_FRAC_GT_2(X, Y)) \ + { \ + R##_e++; \ + _x[1] = (X##_f0 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE) | \ + X##_f1 >> (_FP_W_TYPE_SIZE - \ + (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE))); \ + _x[2] = X##_f1 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE); \ + } \ + else \ + { \ + _x[1] = (X##_f0 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE) | \ + X##_f1 >> (_FP_W_TYPE_SIZE - \ + (_FP_WFRACBITS - _FP_W_TYPE_SIZE))); \ + _x[2] = X##_f1 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE); \ + } \ + \ + (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \ + R##_f1 = _z[1]; \ + R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \ + } while (0) + + +/* + * Square root algorithms: + * We have just one right now, maybe Newton approximation + * should be added for those machines where division is fast. + */ + +#define _FP_SQRT_MEAT_2(R, S, T, X, q) \ + do { \ + while (q) \ + { \ + T##_f1 = S##_f1 + q; \ + if (T##_f1 <= X##_f1) \ + { \ + S##_f1 = T##_f1 + q; \ + X##_f1 -= T##_f1; \ + R##_f1 += q; \ + } \ + _FP_FRAC_SLL_2(X, 1); \ + q >>= 1; \ + } \ + q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ + while (q) \ + { \ + T##_f0 = S##_f0 + q; \ + T##_f1 = S##_f1; \ + if (T##_f1 < X##_f1 || \ + (T##_f1 == X##_f1 && T##_f0 < X##_f0)) \ + { \ + S##_f0 = T##_f0 + q; \ + if (((_FP_WS_TYPE)T##_f0) < 0 && \ + ((_FP_WS_TYPE)S##_f0) >= 0) \ + S##_f1++; \ + _FP_FRAC_SUB_2(X, X, T); \ + R##_f0 += q; \ + } \ + _FP_FRAC_SLL_2(X, 1); \ + q >>= 1; \ + } \ + } while (0) + + +/* + * Assembly/disassembly for converting to/from integral types. + * No shifting or overflow handled here. + */ + +#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \ + do { \ + if (rsize <= _FP_W_TYPE_SIZE) \ + r = X##_f0; \ + else \ + { \ + r = X##_f1; \ + r <<= _FP_W_TYPE_SIZE; \ + r += X##_f0; \ + } \ + } while (0) + +#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \ + do { \ + X##_f0 = r; \ + X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ + } while (0) + +/* + * Convert FP values between word sizes + */ + +#define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) \ + do { \ + _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ + _FP_WFRACBITS_##sfs); \ + D##_f = S##_f0; \ + } while (0) + +#define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) \ + do { \ + D##_f0 = S##_f; \ + D##_f1 = 0; \ + _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ + } while (0) diff --git a/arch/sparc64/math-emu/op-4.h b/arch/sparc64/math-emu/op-4.h new file mode 100644 index 000000000..2f917a847 --- /dev/null +++ b/arch/sparc64/math-emu/op-4.h @@ -0,0 +1,78 @@ +/* + * Basic four-word fraction declaration and manipulation. + */ + +#define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4] +#define _FP_FRAC_COPY_4(D,S) \ + (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \ + D##_f[2] = S##_f[2], D##_f[3] = S##_f[3]) +#define _FP_FRAC_SET_4(X,I) __FP_FRAC_SET_4(X, I) +#define _FP_FRAC_HIGH_4(X) (X##_f[3]) +#define _FP_FRAC_LOW_4(X) (X##_f[0]) +#define _FP_FRAC_WORD_4(X,w) (X##_f[w]) + +#define _FP_FRAC_SLL_4(X,N) \ + do { \ + _FP_I_TYPE _up, _down, _skip, _i; \ + _skip = (N) / _FP_W_TYPE_SIZE; \ + _up = (N) % _FP_W_TYPE_SIZE; \ + _down = _FP_W_TYPE_SIZE - _up; \ + for (_i = 3; _i > _skip; --_i) \ + X##_f[_i] = X##_f[_i-_skip] << _up | X##_f[_i-_skip-1] >> _down; \ + X##_f[_i] <<= _up; \ + for (--_i; _i >= 0; --_i) \ + X##_f[_i] = 0; \ + } while (0) + +#define _FP_FRAC_SRL_4(X,N) \ + do { \ + _FP_I_TYPE _up, _down, _skip, _i; \ + _skip = (N) / _FP_W_TYPE_SIZE; \ + _down = (N) % _FP_W_TYPE_SIZE; \ + _up = _FP_W_TYPE_SIZE - _down; \ + for (_i = 0; _i < 4-_skip; ++_i) \ + X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up; \ + X##_f[_i] >>= _down; \ + for (++_i; _i < 4; ++_i) \ + X##_f[_i] = 0; \ + } while (0) + + +/* Right shift with sticky-lsb. */ +#define _FP_FRAC_SRS_4(X,N,size) \ + do { \ + _FP_I_TYPE _up, _down, _skip, _i; \ + _FP_W_TYPE _s; \ + _skip = (N) / _FP_W_TYPE_SIZE; \ + _down = (N) % _FP_W_TYPE_SIZE; \ + _up = _FP_W_TYPE_SIZE - _down; \ + for (_s = _i = 0; _i < _skip; ++_i) \ + _s |= X##_f[_i]; \ + _s = X##_f[_i] << _up; \ + X##_f[0] = X##_f[_skip] >> _down | X##_f[_skip+1] << _up | (_s != 0); \ + for (_i = 1; _i < 4-_skip; ++_i) \ + X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up; \ + X##_f[_i] >>= _down; \ + for (++_i; _i < 4; ++_i) \ + X##_f[_i] = 0; \ + } while (0) + +#define _FP_FRAC_ADD_4(R,X,Y) \ + __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ + X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ + Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) + +/* + * Internals + */ + +#define __FP_FRAC_SET_4(X,I3,I2,I1,I0) \ + (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0) + +#ifndef __FP_FRAC_ADD_4 +#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ + (r0 = x0 + y0, \ + r1 = x1 + y1 + (r0 < x0), \ + r2 = x2 + y2 + (r1 < x1), \ + r3 = x3 + y3 + (r2 < x2)) +#endif diff --git a/arch/sparc64/math-emu/op-common.h b/arch/sparc64/math-emu/op-common.h new file mode 100644 index 000000000..8123e4c46 --- /dev/null +++ b/arch/sparc64/math-emu/op-common.h @@ -0,0 +1,628 @@ +#define _FP_DECL(wc, X) \ + _FP_I_TYPE X##_c, X##_s, X##_e; \ + _FP_FRAC_DECL_##wc(X) + +/* + * Finish truely unpacking a native fp value by classifying the kind + * of fp value and normalizing both the exponent and the fraction. + */ + +#define _FP_UNPACK_CANONICAL(fs, wc, X) \ +do { \ + switch (X##_e) \ + { \ + default: \ + _FP_FRAC_HIGH_##wc(X) |= _FP_IMPLBIT_##fs; \ + _FP_FRAC_SLL_##wc(X, _FP_WORKBITS); \ + X##_e -= _FP_EXPBIAS_##fs; \ + X##_c = FP_CLS_NORMAL; \ + break; \ + \ + case 0: \ + if (_FP_FRAC_ZEROP_##wc(X)) \ + X##_c = FP_CLS_ZERO; \ + else \ + { \ + /* a denormalized number */ \ + _FP_I_TYPE _shift; \ + _FP_FRAC_CLZ_##wc(_shift, X); \ + _shift -= _FP_FRACXBITS_##fs; \ + _FP_FRAC_SLL_##wc(X, (_shift+_FP_WORKBITS)); \ + X##_e -= _FP_EXPBIAS_##fs - 1 + _shift; \ + X##_c = FP_CLS_NORMAL; \ + } \ + break; \ + \ + case _FP_EXPMAX_##fs: \ + if (_FP_FRAC_ZEROP_##wc(X)) \ + X##_c = FP_CLS_INF; \ + else \ + /* we don't differentiate between signaling and quiet nans */ \ + X##_c = FP_CLS_NAN; \ + break; \ + } \ +} while (0) + + +/* + * Before packing the bits back into the native fp result, take care + * of such mundane things as rounding and overflow. Also, for some + * kinds of fp values, the original parts may not have been fully + * extracted -- but that is ok, we can regenerate them now. + */ + +#define _FP_PACK_CANONICAL(fs, wc, X) \ +do { \ + switch (X##_c) \ + { \ + case FP_CLS_NORMAL: \ + X##_e += _FP_EXPBIAS_##fs; \ + if (X##_e > 0) \ + { \ + _FP_ROUND(wc, X); \ + if (_FP_FRAC_OVERP_##wc(fs, X)) \ + { \ + _FP_FRAC_SRL_##wc(X, (_FP_WORKBITS+1)); \ + X##_e++; \ + } \ + else \ + _FP_FRAC_SRL_##wc(X, _FP_WORKBITS); \ + if (X##_e >= _FP_EXPMAX_##fs) \ + { \ + /* overflow to infinity */ \ + X##_e = _FP_EXPMAX_##fs; \ + _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc); \ + } \ + } \ + else \ + { \ + /* we've got a denormalized number */ \ + X##_e = -X##_e + 1; \ + if (X##_e <= _FP_WFRACBITS_##fs) \ + { \ + _FP_FRAC_SRS_##wc(X, X##_e, _FP_WFRACBITS_##fs); \ + _FP_ROUND(wc, X); \ + X##_e = _FP_FRAC_OVERP_##wc(fs, X); \ + _FP_FRAC_SRL_##wc(X, _FP_WORKBITS); \ + } \ + else \ + { \ + /* underflow to zero */ \ + X##_e = 0; \ + _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc); \ + } \ + } \ + break; \ + \ + case FP_CLS_ZERO: \ + X##_e = 0; \ + _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc); \ + break; \ + \ + case FP_CLS_INF: \ + X##_e = _FP_EXPMAX_##fs; \ + _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc); \ + break; \ + \ + case FP_CLS_NAN: \ + X##_e = _FP_EXPMAX_##fs; \ + if (!_FP_KEEPNANFRACP) \ + { \ + _FP_FRAC_SET_##wc(X, _FP_NANFRAC_##fs); \ + X##_s = 0; \ + } \ + else \ + _FP_FRAC_HIGH_##wc(X) |= _FP_QNANBIT_##fs; \ + break; \ + } \ +} while (0) + + +/* + * Main addition routine. The input values should be cooked. + */ + +#define _FP_ADD(fs, wc, R, X, Y) \ +do { \ + switch (_FP_CLS_COMBINE(X##_c, Y##_c)) \ + { \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL): \ + { \ + /* shift the smaller number so that its exponent matches the larger */ \ + _FP_I_TYPE diff = X##_e - Y##_e; \ + \ + if (diff < 0) \ + { \ + diff = -diff; \ + if (diff <= _FP_WFRACBITS_##fs) \ + _FP_FRAC_SRS_##wc(X, diff, _FP_WFRACBITS_##fs); \ + else if (!_FP_FRAC_ZEROP_##wc(X)) \ + _FP_FRAC_SET_##wc(X, _FP_MINFRAC_##wc); \ + else \ + _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc); \ + R##_e = Y##_e; \ + } \ + else \ + { \ + if (diff > 0) \ + { \ + if (diff <= _FP_WFRACBITS_##fs) \ + _FP_FRAC_SRS_##wc(Y, diff, _FP_WFRACBITS_##fs); \ + else if (!_FP_FRAC_ZEROP_##wc(Y)) \ + _FP_FRAC_SET_##wc(Y, _FP_MINFRAC_##wc); \ + else \ + _FP_FRAC_SET_##wc(Y, _FP_ZEROFRAC_##wc); \ + } \ + R##_e = X##_e; \ + } \ + \ + R##_c = FP_CLS_NORMAL; \ + \ + if (X##_s == Y##_s) \ + { \ + R##_s = X##_s; \ + _FP_FRAC_ADD_##wc(R, X, Y); \ + if (_FP_FRAC_OVERP_##wc(fs, R)) \ + { \ + _FP_FRAC_SRS_##wc(R, 1, _FP_WFRACBITS_##fs); \ + R##_e++; \ + } \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_SUB_##wc(R, X, Y); \ + if (_FP_FRAC_ZEROP_##wc(R)) \ + { \ + /* return an exact zero */ \ + if (FP_ROUNDMODE == FP_RND_MINF) \ + R##_s |= Y##_s; \ + else \ + R##_s &= Y##_s; \ + R##_c = FP_CLS_ZERO; \ + } \ + else \ + { \ + if (_FP_FRAC_NEGP_##wc(R)) \ + { \ + _FP_FRAC_SUB_##wc(R, Y, X); \ + R##_s = Y##_s; \ + } \ + \ + /* renormalize after subtraction */ \ + _FP_FRAC_CLZ_##wc(diff, R); \ + diff -= _FP_WFRACXBITS_##fs; \ + if (diff) \ + { \ + R##_e -= diff; \ + _FP_FRAC_SLL_##wc(R, diff); \ + } \ + } \ + } \ + break; \ + } \ + \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN): \ + _FP_CHOOSENAN(fs, wc, R, X, Y); \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO): \ + R##_e = X##_e; \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL): \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO): \ + _FP_FRAC_COPY_##wc(R, X); \ + R##_s = X##_s; \ + R##_c = X##_c; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL): \ + R##_e = Y##_e; \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN): \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN): \ + _FP_FRAC_COPY_##wc(R, Y); \ + R##_s = Y##_s; \ + R##_c = Y##_c; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF): \ + if (X##_s != Y##_s) \ + { \ + /* +INF + -INF => NAN */ \ + _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs); \ + R##_s = X##_s ^ Y##_s; \ + R##_c = FP_CLS_NAN; \ + break; \ + } \ + /* FALLTHRU */ \ + \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL): \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO): \ + R##_s = X##_s; \ + R##_c = FP_CLS_INF; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF): \ + R##_s = Y##_s; \ + R##_c = FP_CLS_INF; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO): \ + /* make sure the sign is correct */ \ + if (FP_ROUNDMODE == FP_RND_MINF) \ + R##_s = X##_s | Y##_s; \ + else \ + R##_s = X##_s & Y##_s; \ + R##_c = FP_CLS_ZERO; \ + break; \ + \ + default: \ + abort(); \ + } \ +} while (0) + + +/* + * Main negation routine. FIXME -- when we care about setting exception + * bits reliably, this will not do. We should examine all of the fp classes. + */ + +#define _FP_NEG(fs, wc, R, X) \ + do { \ + _FP_FRAC_COPY_##wc(R, X); \ + R##_c = X##_c; \ + R##_e = X##_e; \ + R##_s = 1 ^ X##_s; \ + } while (0) + + +/* + * Main multiplication routine. The input values should be cooked. + */ + +#define _FP_MUL(fs, wc, R, X, Y) \ +do { \ + R##_s = X##_s ^ Y##_s; \ + switch (_FP_CLS_COMBINE(X##_c, Y##_c)) \ + { \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL): \ + R##_c = FP_CLS_NORMAL; \ + R##_e = X##_e + Y##_e + 1; \ + \ + _FP_MUL_MEAT_##fs(R,X,Y); \ + \ + if (_FP_FRAC_OVERP_##wc(fs, R)) \ + _FP_FRAC_SRS_##wc(R, 1, _FP_WFRACBITS_##fs); \ + else \ + R##_e--; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN): \ + _FP_CHOOSENAN(fs, wc, R, X, Y); \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL): \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO): \ + R##_s = X##_s; \ + \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO): \ + _FP_FRAC_COPY_##wc(R, X); \ + R##_c = X##_c; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN): \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN): \ + R##_s = Y##_s; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO): \ + _FP_FRAC_COPY_##wc(R, Y); \ + R##_c = Y##_c; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF): \ + R##_c = FP_CLS_NAN; \ + _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs); \ + break; \ + \ + default: \ + abort(); \ + } \ +} while (0) + + +/* + * Main division routine. The input values should be cooked. + */ + +#define _FP_DIV(fs, wc, R, X, Y) \ +do { \ + R##_s = X##_s ^ Y##_s; \ + switch (_FP_CLS_COMBINE(X##_c, Y##_c)) \ + { \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL): \ + R##_c = FP_CLS_NORMAL; \ + R##_e = X##_e - Y##_e; \ + \ + _FP_DIV_MEAT_##fs(R,X,Y); \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN): \ + _FP_CHOOSENAN(fs, wc, R, X, Y); \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL): \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO): \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R, X); \ + R##_c = X##_c; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN): \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN): \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R, Y); \ + R##_c = Y##_c; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL): \ + R##_c = FP_CLS_ZERO; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO): \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO): \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL): \ + R##_c = FP_CLS_INF; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO): \ + R##_c = FP_CLS_NAN; \ + _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs); \ + break; \ + \ + default: \ + abort(); \ + } \ +} while (0) + + +/* + * Main differential comparison routine. The inputs should be raw not + * cooked. The return is -1,0,1 for normal values, 2 otherwise. + */ + +#define _FP_CMP(fs, wc, ret, X, Y, un) \ + do { \ + /* NANs are unordered */ \ + if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(X)) \ + || (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y))) \ + { \ + ret = un; \ + } \ + else \ + { \ + /* Force -0 -> +0 */ \ + if (!X##_e && _FP_FRAC_ZEROP_##wc(X)) X##_s = 0; \ + if (!Y##_e && _FP_FRAC_ZEROP_##wc(Y)) X##_s = 0; \ + \ + if (X##_s != Y##_s) \ + ret = X##_s ? -1 : 1; \ + else if (X##_e > Y##_e) \ + ret = X##_s ? -1 : 1; \ + else if (X##_e < Y##_e) \ + ret = X##_s ? 1 : -1; \ + else if (_FP_FRAC_GT_##wc(X, Y)) \ + ret = X##_s ? -1 : 1; \ + else if (_FP_FRAC_GT_##wc(Y, X)) \ + ret = X##_s ? 1 : -1; \ + else \ + ret = 0; \ + } \ + } while (0) + + +/* Simplification for strict equality. */ + +#define _FP_CMP_EQ(fs, wc, ret, X, Y) \ + do { \ + /* NANs are unordered */ \ + if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(X)) \ + || (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y))) \ + { \ + ret = 1; \ + } \ + else \ + { \ + ret = !(X##_e == Y##_e \ + && _FP_FRAC_EQ_##wc(X, Y) \ + && (X##_s == Y##_s || !X##_e && _FP_FRAC_ZEROP_##wc(X))); \ + } \ + } while (0) + +/* + * Main square root routine. The input value should be cooked. + */ + +#define _FP_SQRT(fs, wc, R, X) \ +do { \ + _FP_FRAC_DECL_##wc(T); _FP_FRAC_DECL_##wc(S); \ + _FP_W_TYPE q; \ + switch (X##_c) \ + { \ + case FP_CLS_NAN: \ + R##_s = 0; \ + R##_c = FP_CLS_NAN; \ + _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc); \ + break; \ + case FP_CLS_INF: \ + if (X##_s) \ + { \ + R##_s = 0; \ + R##_c = FP_CLS_NAN; /* sNAN */ \ + } \ + else \ + { \ + R##_s = 0; \ + R##_c = FP_CLS_INF; /* sqrt(+inf) = +inf */ \ + } \ + break; \ + case FP_CLS_ZERO: \ + R##_s = X##_s; \ + R##_c = FP_CLS_ZERO; /* sqrt(+-0) = +-0 */ \ + break; \ + case FP_CLS_NORMAL: \ + R##_s = 0; \ + if (X##_s) \ + { \ + R##_c = FP_CLS_NAN; /* sNAN */ \ + break; \ + } \ + R##_c = FP_CLS_NORMAL; \ + if (X##_e & 1) \ + _FP_FRAC_SLL_##wc(X, 1); \ + R##_e = X##_e >> 1; \ + _FP_FRAC_SET_##wc(S, _FP_ZEROFRAC_##wc); \ + _FP_FRAC_SET_##wc(R, _FP_ZEROFRAC_##wc); \ + q = _FP_OVERFLOW_##fs; \ + _FP_FRAC_SLL_##wc(X, 1); \ + _FP_SQRT_MEAT_##wc(R, S, T, X, q); \ + _FP_FRAC_SRL_##wc(R, 1); \ + } \ + } while (0) + +/* + * Convert from FP to integer + */ + +#define _FP_TO_INT(fs, wc, r, X, rsize, rsigned) \ + do { \ + switch (X##_c) \ + { \ + case FP_CLS_NORMAL: \ + if (X##_e < 0) \ + { \ + case FP_CLS_NAN: \ + case FP_CLS_ZERO: \ + r = 0; \ + } \ + else if (X##_e >= rsize - (rsigned != 0)) \ + { \ + case FP_CLS_INF: \ + if (rsigned) \ + { \ + r = 1; \ + r <<= rsize - 1; \ + r -= 1 - X##_s; \ + } \ + else \ + { \ + r = 0; \ + if (!X##_s) \ + r = ~r; \ + } \ + } \ + else \ + { \ + if (_FP_W_TYPE_SIZE*wc < rsize) \ + { \ + _FP_FRAC_ASSEMBLE_##wc(r, X, rsize); \ + r <<= X##_e - _FP_WFRACBITS_##fs; \ + } \ + else \ + { \ + if (X##_e >= _FP_WFRACBITS_##fs) \ + _FP_FRAC_SLL_##wc(X, (X##_e - _FP_WFRACBITS_##fs + 1)); \ + else \ + _FP_FRAC_SRL_##wc(X, (_FP_WFRACBITS_##fs - X##_e - 1)); \ + _FP_FRAC_ASSEMBLE_##wc(r, X, rsize); \ + } \ + if (rsigned && X##_s) \ + r = -r; \ + } \ + break; \ + } \ + } while (0) + +#define _FP_FROM_INT(fs, wc, X, r, rsize, rtype) \ + do { \ + if (r) \ + { \ + X##_c = FP_CLS_NORMAL; \ + \ + if ((X##_s = (r < 0))) \ + r = -r; \ + /* Note that `r' is now considered unsigned, so we don't have \ + to worry about the single signed overflow case. */ \ + \ + if (rsize <= _FP_W_TYPE_SIZE) \ + __FP_CLZ(X##_e, r); \ + else \ + __FP_CLZ_2(X##_e, (_FP_W_TYPE)(r >> _FP_W_TYPE_SIZE), \ + (_FP_W_TYPE)r); \ + if (rsize < _FP_W_TYPE_SIZE) \ + X##_e -= (_FP_W_TYPE_SIZE - rsize); \ + X##_e = rsize - X##_e - 1; \ + \ + if (_FP_FRACBITS_##fs < rsize && _FP_WFRACBITS_##fs < X##_e) \ + __FP_FRAC_SRS_1(r, (X##_e - _FP_WFRACBITS_##fs), rsize); \ + r &= ~((_FP_W_TYPE)1 << X##_e); \ + _FP_FRAC_DISASSEMBLE_##wc(X, ((unsigned rtype)r), rsize); \ + _FP_FRAC_SLL_##wc(X, (_FP_WFRACBITS_##fs - X##_e - 1)); \ + } \ + else \ + { \ + X##_c = FP_CLS_ZERO, X##_s = 0; \ + } \ + } while (0) + + +#define FP_CONV(dfs,sfs,dwc,swc,D,S) \ + do { \ + _FP_FRAC_CONV_##dwc##_##swc(dfs, sfs, D, S); \ + D##_e = S##_e; \ + D##_c = S##_c; \ + D##_s = S##_s; \ + } while (0) + +/* + * Helper primitives. + */ + +/* Count leading zeros in a word. */ + +#ifndef __FP_CLZ +#define __FP_CLZ(r, x) \ + do { \ + _FP_W_TYPE _t = (x); \ + r = _FP_W_TYPE_SIZE - 1; \ + if (_t > 0xffffffff) r -= 32; \ + if (_t > 0xffffffff) _t >>= 32; \ + if (_t > 0xffff) r -= 16; \ + if (_t > 0xffff) _t >>= 16; \ + if (_t > 0xff) r -= 8; \ + if (_t > 0xff) _t >>= 8; \ + if (_t & 0xf0) r -= 4; \ + if (_t & 0xf0) _t >>= 4; \ + if (_t & 0xc) r -= 2; \ + if (_t & 0xc) _t >>= 2; \ + if (_t & 0x2) r -= 1; \ + } while (0) +#endif + +#define _FP_DIV_HELP_imm(q, r, n, d) \ + do { \ + q = n / d, r = n % d; \ + } while (0) diff --git a/arch/sparc64/math-emu/quad.h b/arch/sparc64/math-emu/quad.h new file mode 100644 index 000000000..dfc3b4eea --- /dev/null +++ b/arch/sparc64/math-emu/quad.h @@ -0,0 +1,71 @@ +/* + * Definitions for IEEE Quad Precision + */ + +#if _FP_W_TYPE_SIZE < 64 +#error "Only stud muffins allowed, schmuck." +#endif + +#define _FP_FRACTBITS_Q (2*_FP_W_TYPE_SIZE) + +#define _FP_FRACBITS_Q 113 +#define _FP_FRACXBITS_Q (_FP_FRACTBITS_Q - _FP_FRACBITS_Q) +#define _FP_WFRACBITS_Q (_FP_WORKBITS + _FP_FRACBITS_Q) +#define _FP_WFRACXBITS_Q (_FP_FRACTBITS_Q - _FP_WFRACBITS_Q) +#define _FP_EXPBITS_Q 15 +#define _FP_EXPBIAS_Q 16383 +#define _FP_EXPMAX_Q 32767 + +#define _FP_QNANBIT_Q \ + ((_FP_W_TYPE)1 << (_FP_FRACBITS_Q-2) % _FP_W_TYPE_SIZE) +#define _FP_IMPLBIT_Q \ + ((_FP_W_TYPE)1 << (_FP_FRACBITS_Q-1) % _FP_W_TYPE_SIZE) +#define _FP_OVERFLOW_Q \ + ((_FP_W_TYPE)1 << (_FP_WFRACBITS_Q % _FP_W_TYPE_SIZE)) + +union _FP_UNION_Q +{ + long double flt /* __attribute__((mode(TF))) */ ; + struct { +#if __BYTE_ORDER == __BIG_ENDIAN + unsigned sign : 1; + unsigned exp : _FP_EXPBITS_Q; + unsigned long frac1 : _FP_FRACBITS_Q-(_FP_IMPLBIT_Q != 0)-_FP_W_TYPE_SIZE; + unsigned long frac0 : _FP_W_TYPE_SIZE; +#else + unsigned long frac0 : _FP_W_TYPE_SIZE; + unsigned long frac1 : _FP_FRACBITS_Q-(_FP_IMPLBIT_Q != 0)-_FP_W_TYPE_SIZE; + unsigned exp : _FP_EXPBITS_Q; + unsigned sign : 1; +#endif + } bits; +}; + +#define FP_DECL_Q(X) _FP_DECL(2,X) +#define FP_UNPACK_RAW_Q(X,val) _FP_UNPACK_RAW_2(Q,X,val) +#define FP_PACK_RAW_Q(val,X) _FP_PACK_RAW_2(Q,val,X) + +#define FP_UNPACK_Q(X,val) \ + do { \ + _FP_UNPACK_RAW_2(Q,X,val); \ + _FP_UNPACK_CANONICAL(Q,2,X); \ + } while (0) + +#define FP_PACK_Q(val,X) \ + do { \ + _FP_PACK_CANONICAL(Q,2,X); \ + _FP_PACK_RAW_2(Q,val,X); \ + } while (0) + +#define FP_NEG_Q(R,X) _FP_NEG(Q,2,R,X) +#define FP_ADD_Q(R,X,Y) _FP_ADD(Q,2,R,X,Y) +#define FP_SUB_Q(R,X,Y) _FP_SUB(Q,2,R,X,Y) +#define FP_MUL_Q(R,X,Y) _FP_MUL(Q,2,R,X,Y) +#define FP_DIV_Q(R,X,Y) _FP_DIV(Q,2,R,X,Y) +#define FP_SQRT_Q(R,X) _FP_SQRT(Q,2,R,X) + +#define FP_CMP_Q(r,X,Y,un) _FP_CMP(Q,2,r,X,Y,un) +#define FP_CMP_EQ_Q(r,X,Y) _FP_CMP_EQ(Q,2,r,X,Y) + +#define FP_TO_INT_Q(r,X,rsz,rsg) _FP_TO_INT(Q,2,r,X,rsz,rsg) +#define FP_FROM_INT_Q(X,r,rs,rt) _FP_FROM_INT(Q,2,X,r,rs,rt) diff --git a/arch/sparc64/math-emu/sfp-machine.h b/arch/sparc64/math-emu/sfp-machine.h new file mode 100644 index 000000000..f15a5ea4a --- /dev/null +++ b/arch/sparc64/math-emu/sfp-machine.h @@ -0,0 +1,225 @@ +/* Machine-dependent software floating-point definitions. Sparc64 version. + Copyright (C) 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#define _FP_W_TYPE_SIZE 64 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +#define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_imm(S,R,X,Y) +#define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_1_wide(D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) _FP_MUL_MEAT_2_wide(Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv_64(Q,R,X,Y) + +#define _FP_NANFRAC_S _FP_QNANBIT_S +#define _FP_NANFRAC_D _FP_QNANBIT_D +#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0 + +#define _FP_KEEPNANFRACP 1 +#define _FP_CHOOSENAN(fs, wc, R, X, Y) \ + do { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + R##_c = FP_CLS_NAN; \ + } while (0) + +#define __FP_UNPACK_RAW_1(fs, X, val) \ + do { \ + union _FP_UNION_##fs *_flo = \ + (union _FP_UNION_##fs *)val; \ + \ + X##_f = _flo->bits.frac; \ + X##_e = _flo->bits.exp; \ + X##_s = _flo->bits.sign; \ + } while (0) + +#define __FP_PACK_RAW_1(fs, val, X) \ + do { \ + union _FP_UNION_##fs *_flo = \ + (union _FP_UNION_##fs *)val; \ + \ + _flo->bits.frac = X##_f; \ + _flo->bits.exp = X##_e; \ + _flo->bits.sign = X##_s; \ + } while (0) + +#define __FP_UNPACK_RAW_2(fs, X, val) \ + do { \ + union _FP_UNION_##fs *_flo = \ + (union _FP_UNION_##fs *)val; \ + \ + X##_f0 = _flo->bits.frac0; \ + X##_f1 = _flo->bits.frac1; \ + X##_e = _flo->bits.exp; \ + X##_s = _flo->bits.sign; \ + } while (0) + +#define __FP_PACK_RAW_2(fs, val, X) \ + do { \ + union _FP_UNION_##fs *_flo = \ + (union _FP_UNION_##fs *)val; \ + \ + _flo->bits.frac0 = X##_f0; \ + _flo->bits.frac1 = X##_f1; \ + _flo->bits.exp = X##_e; \ + _flo->bits.sign = X##_s; \ + } while (0) + +#define __FP_UNPACK_S(X,val) \ + do { \ + __FP_UNPACK_RAW_1(S,X,val); \ + _FP_UNPACK_CANONICAL(S,1,X); \ + } while (0) + +#define __FP_PACK_S(val,X) \ + do { \ + _FP_PACK_CANONICAL(S,1,X); \ + __FP_PACK_RAW_1(S,val,X); \ + } while (0) + +#define __FP_UNPACK_D(X,val) \ + do { \ + __FP_UNPACK_RAW_1(D,X,val); \ + _FP_UNPACK_CANONICAL(D,1,X); \ + } while (0) + +#define __FP_PACK_D(val,X) \ + do { \ + _FP_PACK_CANONICAL(D,1,X); \ + __FP_PACK_RAW_1(D,val,X); \ + } while (0) + +#define __FP_UNPACK_Q(X,val) \ + do { \ + __FP_UNPACK_RAW_2(Q,X,val); \ + _FP_UNPACK_CANONICAL(Q,2,X); \ + } while (0) + +#define __FP_PACK_Q(val,X) \ + do { \ + _FP_PACK_CANONICAL(Q,2,X); \ + __FP_PACK_RAW_2(Q,val,X); \ + } while (0) + +#include <linux/types.h> +#include <asm/byteorder.h> + +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addcc %4,%5,%1 + add %2,%3,%0 + bcs,a,pn %%xcc, 1f + add %0, 1, %0 + 1:" \ + : "=r" ((UDItype)(sh)), \ + "=&r" ((UDItype)(sl)) \ + : "r" ((UDItype)(ah)), \ + "r" ((UDItype)(bh)), \ + "r" ((UDItype)(al)), \ + "r" ((UDItype)(bl)) \ + : "cc") + +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subcc %4,%5,%1 + sub %2,%3,%0 + bcs,a,pn %%xcc, 1f + sub %0, 1, %0 + 1:" \ + : "=r" ((UDItype)(sh)), \ + "=&r" ((UDItype)(sl)) \ + : "r" ((UDItype)(ah)), \ + "r" ((UDItype)(bh)), \ + "r" ((UDItype)(al)), \ + "r" ((UDItype)(bl)) \ + : "cc") + +#define umul_ppmm(wh, wl, u, v) \ + do { \ + long tmp1 = 0, tmp2 = 0, tmp3 = 0; \ + __asm__ ("mulx %2,%3,%1 + srlx %2,32,%4 + srl %3,0,%5 + mulx %4,%5,%6 + srlx %3,32,%4 + srl %2,0,%5 + mulx %4,%5,%5 + srlx %2,32,%4 + add %5,%6,%6 + srlx %3,32,%5 + mulx %4,%5,%4 + srlx %6,32,%5 + add %4,%5,%0" \ + : "=r" ((UDItype)(wh)), \ + "=&r" ((UDItype)(wl)) \ + : "r" ((UDItype)(u)), \ + "r" ((UDItype)(v)), \ + "r" ((UDItype)(tmp1)), \ + "r" ((UDItype)(tmp2)), \ + "r" ((UDItype)(tmp3)) \ + : "cc"); \ + } while (0) + +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ + __d1 = (d >> 32); \ + __d0 = (USItype)d; \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (UWtype) __q1 * __d0; \ + __r1 = (__r1 << 32) | (n0 >> 32); \ + if (__r1 < __m) \ + { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (UWtype) __q0 * __d0; \ + __r0 = (__r0 << 32) | ((USItype)n0); \ + if (__r0 < __m) \ + { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (UWtype) (__q1 << 32) | __q0; \ + (r) = __r0; \ + } while (0) + +#define UDIV_NEEDS_NORMALIZATION 1 + +#define abort() \ + return 0 + +#ifdef __BIG_ENDIAN +#define __BYTE_ORDER __BIG_ENDIAN +#else +#define __BYTE_ORDER __LITTLE_ENDIAN +#endif diff --git a/arch/sparc64/math-emu/single.h b/arch/sparc64/math-emu/single.h new file mode 100644 index 000000000..fa7f386cd --- /dev/null +++ b/arch/sparc64/math-emu/single.h @@ -0,0 +1,66 @@ +/* + * Definitions for IEEE Single Precision + */ + +#if _FP_W_TYPE_SIZE < 32 +#error "Here's a nickle kid. Go buy yourself a real computer." +#endif + +#define _FP_FRACBITS_S 24 +#define _FP_FRACXBITS_S (_FP_W_TYPE_SIZE - _FP_FRACBITS_S) +#define _FP_WFRACBITS_S (_FP_WORKBITS + _FP_FRACBITS_S) +#define _FP_WFRACXBITS_S (_FP_W_TYPE_SIZE - _FP_WFRACBITS_S) +#define _FP_EXPBITS_S 8 +#define _FP_EXPBIAS_S 127 +#define _FP_EXPMAX_S 255 +#define _FP_QNANBIT_S ((_FP_W_TYPE)1 << (_FP_FRACBITS_S-2)) +#define _FP_IMPLBIT_S ((_FP_W_TYPE)1 << (_FP_FRACBITS_S-1)) +#define _FP_OVERFLOW_S ((_FP_W_TYPE)1 << (_FP_WFRACBITS_S)) + +/* The implementation of _FP_MUL_MEAT_S and _FP_DIV_MEAT_S should be + chosen by the target machine. */ + +union _FP_UNION_S +{ + float flt; + struct { +#if __BYTE_ORDER == __BIG_ENDIAN + unsigned sign : 1; + unsigned exp : _FP_EXPBITS_S; + unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0); +#else + unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0); + unsigned exp : _FP_EXPBITS_S; + unsigned sign : 1; +#endif + } bits __attribute__((packed)); +}; + +#define FP_DECL_S(X) _FP_DECL(1,X) +#define FP_UNPACK_RAW_S(X,val) _FP_UNPACK_RAW_1(S,X,val) +#define FP_PACK_RAW_S(val,X) _FP_PACK_RAW_1(S,val,X) + +#define FP_UNPACK_S(X,val) \ + do { \ + _FP_UNPACK_RAW_1(S,X,val); \ + _FP_UNPACK_CANONICAL(S,1,X); \ + } while (0) + +#define FP_PACK_S(val,X) \ + do { \ + _FP_PACK_CANONICAL(S,1,X); \ + _FP_PACK_RAW_1(S,val,X); \ + } while (0) + +#define FP_NEG_S(R,X) _FP_NEG(S,1,R,X) +#define FP_ADD_S(R,X,Y) _FP_ADD(S,1,R,X,Y) +#define FP_SUB_S(R,X,Y) _FP_SUB(S,1,R,X,Y) +#define FP_MUL_S(R,X,Y) _FP_MUL(S,1,R,X,Y) +#define FP_DIV_S(R,X,Y) _FP_DIV(S,1,R,X,Y) +#define FP_SQRT_S(R,X) _FP_SQRT(S,1,R,X) + +#define FP_CMP_S(r,X,Y,un) _FP_CMP(S,1,r,X,Y,un) +#define FP_CMP_EQ_S(r,X,Y) _FP_CMP_EQ(S,1,r,X,Y) + +#define FP_TO_INT_S(r,X,rsz,rsg) _FP_TO_INT(S,1,r,X,rsz,rsg) +#define FP_FROM_INT_S(X,r,rs,rt) _FP_FROM_INT(S,1,X,r,rs,rt) diff --git a/arch/sparc64/math-emu/soft-fp.h b/arch/sparc64/math-emu/soft-fp.h new file mode 100644 index 000000000..a4f72e78f --- /dev/null +++ b/arch/sparc64/math-emu/soft-fp.h @@ -0,0 +1,83 @@ +#ifndef SOFT_FP_H +#define SOFT_FP_H + +#include "sfp-machine.h" + +#define _FP_WORKBITS 3 +#define _FP_WORK_LSB ((_FP_W_TYPE)1 << 3) +#define _FP_WORK_ROUND ((_FP_W_TYPE)1 << 2) +#define _FP_WORK_GUARD ((_FP_W_TYPE)1 << 1) +#define _FP_WORK_STICKY ((_FP_W_TYPE)1 << 0) + +#ifndef FP_RND_NEAREST +# define FP_RND_NEAREST 0 +# define FP_RND_ZERO 1 +# define FP_RND_PINF 2 +# define FP_RND_MINF 3 +# define FP_ROUNDMODE FP_RND_NEAREST +#endif + +#define _FP_ROUND_NEAREST(wc, X) \ + do { \ + if ((_FP_FRAC_LOW_##wc(X) & 15) != _FP_WORK_ROUND) \ + _FP_FRAC_ADDI_##wc(X, _FP_WORK_ROUND); \ + } while(0) + +#define _FP_ROUND_ZERO(wc, X) + +#define _FP_ROUND_PINF(wc, X) \ + do { \ + if (!X##_s && (_FP_FRAC_LOW_##wc(X) & 7)) \ + _FP_FRAC_ADDI_##wc(X, _FP_WORK_LSB); \ + } while (0) + +#define _FP_ROUND_MINF(wc, X) \ + do { \ + if (X##_s && (_FP_FRAC_LOW_##wc(X) & 7)) \ + _FP_FRAC_ADDI_##wc(X, _FP_WORK_LSB); \ + } while (0) + +#define _FP_ROUND(wc, X) \ + switch (FP_ROUNDMODE) \ + { \ + case FP_RND_NEAREST: \ + _FP_ROUND_NEAREST(wc,X); \ + break; \ + case FP_RND_ZERO: \ + _FP_ROUND_ZERO(wc,X); \ + break; \ + case FP_RND_PINF: \ + _FP_ROUND_PINF(wc,X); \ + break; \ + case FP_RND_MINF: \ + _FP_ROUND_MINF(wc,X); \ + break; \ + } + +#define FP_CLS_NORMAL 0 +#define FP_CLS_ZERO 1 +#define FP_CLS_INF 2 +#define FP_CLS_NAN 3 + +#define _FP_CLS_COMBINE(x,y) (((x) << 2) | (y)) + +#include "op-1.h" +#include "op-2.h" +#include "op-4.h" +#include "op-common.h" + +/* Sigh. Silly things longlong.h needs. */ +#define UWtype _FP_W_TYPE +#define W_TYPE_SIZE _FP_W_TYPE_SIZE + +typedef int SItype __attribute__((mode(SI))); +typedef int DItype __attribute__((mode(DI))); +typedef unsigned int USItype __attribute__((mode(SI))); +typedef unsigned int UDItype __attribute__((mode(DI))); +#if _FP_W_TYPE_SIZE == 32 +typedef unsigned int UHWtype __attribute__((mode(HI))); +#elif _FP_W_TYPE_SIZE == 64 +typedef USItype UHWtype; +#endif + +#endif diff --git a/arch/sparc64/math-emu/udivmodti4.c b/arch/sparc64/math-emu/udivmodti4.c new file mode 100644 index 000000000..7e112dc1e --- /dev/null +++ b/arch/sparc64/math-emu/udivmodti4.c @@ -0,0 +1,191 @@ +/* This has so very few changes over libgcc2's __udivmoddi4 it isn't funny. */ + +#include "soft-fp.h" + +#undef count_leading_zeros +#define count_leading_zeros __FP_CLZ + +void +_fp_udivmodti4(_FP_W_TYPE q[2], _FP_W_TYPE r[2], + _FP_W_TYPE n1, _FP_W_TYPE n0, + _FP_W_TYPE d1, _FP_W_TYPE d0) +{ + _FP_W_TYPE q0, q1, r0, r1; + _FP_I_TYPE b, bm; + + if (d1 == 0) + { +#if !UDIV_NEEDS_NORMALIZATION + if (d0 > n1) + { + /* 0q = nn / 0D */ + + udiv_qrnnd (q0, n0, n1, n0, d0); + q1 = 0; + + /* Remainder in n0. */ + } + else + { + /* qq = NN / 0d */ + + if (d0 == 0) + d0 = 1 / d0; /* Divide intentionally by zero. */ + + udiv_qrnnd (q1, n1, 0, n1, d0); + udiv_qrnnd (q0, n0, n1, n0, d0); + + /* Remainder in n0. */ + } + + r0 = n0; + r1 = 0; + +#else /* UDIV_NEEDS_NORMALIZATION */ + + if (d0 > n1) + { + /* 0q = nn / 0D */ + + count_leading_zeros (bm, d0); + + if (bm != 0) + { + /* Normalize, i.e. make the most significant bit of the + denominator set. */ + + d0 = d0 << bm; + n1 = (n1 << bm) | (n0 >> (_FP_W_TYPE_SIZE - bm)); + n0 = n0 << bm; + } + + udiv_qrnnd (q0, n0, n1, n0, d0); + q1 = 0; + + /* Remainder in n0 >> bm. */ + } + else + { + /* qq = NN / 0d */ + + if (d0 == 0) + d0 = 1 / d0; /* Divide intentionally by zero. */ + + count_leading_zeros (bm, d0); + + if (bm == 0) + { + /* From (n1 >= d0) /\ (the most significant bit of d0 is set), + conclude (the most significant bit of n1 is set) /\ (the + leading quotient digit q1 = 1). + + This special case is necessary, not an optimization. + (Shifts counts of SI_TYPE_SIZE are undefined.) */ + + n1 -= d0; + q1 = 1; + } + else + { + _FP_W_TYPE n2; + + /* Normalize. */ + + b = _FP_W_TYPE_SIZE - bm; + + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + + udiv_qrnnd (q1, n1, n2, n1, d0); + } + + /* n1 != d0... */ + + udiv_qrnnd (q0, n0, n1, n0, d0); + + /* Remainder in n0 >> bm. */ + } + + r0 = n0 >> bm; + r1 = 0; +#endif /* UDIV_NEEDS_NORMALIZATION */ + } + else + { + if (d1 > n1) + { + /* 00 = nn / DD */ + + q0 = 0; + q1 = 0; + + /* Remainder in n1n0. */ + r0 = n0; + r1 = n1; + } + else + { + /* 0q = NN / dd */ + + count_leading_zeros (bm, d1); + if (bm == 0) + { + /* From (n1 >= d1) /\ (the most significant bit of d1 is set), + conclude (the most significant bit of n1 is set) /\ (the + quotient digit q0 = 0 or 1). + + This special case is necessary, not an optimization. */ + + /* The condition on the next line takes advantage of that + n1 >= d1 (true due to program flow). */ + if (n1 > d1 || n0 >= d0) + { + q0 = 1; + sub_ddmmss (n1, n0, n1, n0, d1, d0); + } + else + q0 = 0; + + q1 = 0; + + r0 = n0; + r1 = n1; + } + else + { + _FP_W_TYPE m1, m0, n2; + + /* Normalize. */ + + b = _FP_W_TYPE_SIZE - bm; + + d1 = (d1 << bm) | (d0 >> b); + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + + udiv_qrnnd (q0, n1, n2, n1, d1); + umul_ppmm (m1, m0, q0, d0); + + if (m1 > n1 || (m1 == n1 && m0 > n0)) + { + q0--; + sub_ddmmss (m1, m0, m1, m0, d1, d0); + } + + q1 = 0; + + /* Remainder in (n1n0 - m1m0) >> bm. */ + sub_ddmmss (n1, n0, n1, n0, m1, m0); + r0 = (n1 << b) | (n0 >> bm); + r1 = n1 >> bm; + } + } + } + + q[0] = q0; q[1] = q1; + r[0] = r0, r[1] = r1; +} |