diff options
Diffstat (limited to 'arch/sparc64/math-emu')
-rw-r--r-- | arch/sparc64/math-emu/Makefile | 10 | ||||
-rw-r--r-- | arch/sparc64/math-emu/double.h | 2 | ||||
-rw-r--r-- | arch/sparc64/math-emu/fabsq.c | 13 | ||||
-rw-r--r-- | arch/sparc64/math-emu/fcmpeq.c | 7 | ||||
-rw-r--r-- | arch/sparc64/math-emu/fcmpq.c | 7 | ||||
-rw-r--r-- | arch/sparc64/math-emu/fnegq.c | 11 | ||||
-rw-r--r-- | arch/sparc64/math-emu/math.c | 27 | ||||
-rw-r--r-- | arch/sparc64/math-emu/op-2.h | 33 | ||||
-rw-r--r-- | arch/sparc64/math-emu/op-4.h | 235 | ||||
-rw-r--r-- | arch/sparc64/math-emu/op-common.h | 52 | ||||
-rw-r--r-- | arch/sparc64/math-emu/quad.h | 73 | ||||
-rw-r--r-- | arch/sparc64/math-emu/single.h | 2 |
12 files changed, 383 insertions, 89 deletions
diff --git a/arch/sparc64/math-emu/Makefile b/arch/sparc64/math-emu/Makefile index ea816d98e..8f695b1e2 100644 --- a/arch/sparc64/math-emu/Makefile +++ b/arch/sparc64/math-emu/Makefile @@ -1,5 +1,5 @@ # -# Makefile for the FPU Quad (long double) instruction emulation. +# Makefile for the FPU instruction emulation. # # Note! Dependencies are done automagically by 'make dep', which also # removes any old dependencies. DON'T put your own dependencies here @@ -16,18 +16,10 @@ O_OBJS := math.o fabsq.o faddq.o fdivq.o fdmulq.o fitoq.o \ fmuls.o fmuld.o fdivs.o fdivd.o fsmuld.o \ fstoi.o fdtoi.o fstox.o fdtox.o fstod.o fdtos.o -ifeq ($(CONFIG_MATHEMU),m) -M_OBJS := $(O_TARGET) -endif - .S.s: $(CPP) -D__ASSEMBLY__ -ansi $< -o $*.s .S.o: $(CC) -D__ASSEMBLY__ -ansi -c $< -o $*.o -ifneq ($(CONFIG_MATHEMU),y) -do_it_all: -endif - include $(TOPDIR)/Rules.make diff --git a/arch/sparc64/math-emu/double.h b/arch/sparc64/math-emu/double.h index b68d76790..6aff6fdd5 100644 --- a/arch/sparc64/math-emu/double.h +++ b/arch/sparc64/math-emu/double.h @@ -3,7 +3,7 @@ */ #if _FP_W_TYPE_SIZE < 32 -#error "Here's a nickle kid. Go buy yourself a real computer." +#error "Here's a nickel kid. Go buy yourself a real computer." #endif #if _FP_W_TYPE_SIZE < 64 diff --git a/arch/sparc64/math-emu/fabsq.c b/arch/sparc64/math-emu/fabsq.c index e6aa497c8..e01b02046 100644 --- a/arch/sparc64/math-emu/fabsq.c +++ b/arch/sparc64/math-emu/fabsq.c @@ -1,18 +1,5 @@ -#include "soft-fp.h" -#include "quad.h" - int FABSQ(unsigned long *rd, unsigned long *rs2) { -/* - FP_DECL_Q(A); FP_DECL_Q(R); - - __FP_UNPACK_Q(A, rs2); - _FP_FRAC_COPY_2(R, A); - R_c = A_c; - R_e = A_e; - R_s = 0; - __FP_PACK_Q(rd, R); - */ rd[0] = rs2[0] & 0x7fffffffffffffffUL; rd[1] = rs2[1]; return 1; diff --git a/arch/sparc64/math-emu/fcmpeq.c b/arch/sparc64/math-emu/fcmpeq.c index cb37bc0db..e74b1b06b 100644 --- a/arch/sparc64/math-emu/fcmpeq.c +++ b/arch/sparc64/math-emu/fcmpeq.c @@ -11,11 +11,8 @@ int FCMPEQ(void *rd, void *rs2, void *rs1) rd = (void *)(((long)rd)&~3); __FP_UNPACK_Q(A, rs1); __FP_UNPACK_Q(B, rs2); - FP_CMP_Q(ret, A, B, 3); - switch (ret) { - case 1: ret = 2; break; - case -1: ret = 1; break; - } + FP_CMP_Q(ret, B, A, 3); + if (ret == -1) ret = 2; fsr = *(unsigned long *)rd; switch (fccno) { case 0: fsr &= ~0xc00; fsr |= (ret << 10); break; diff --git a/arch/sparc64/math-emu/fcmpq.c b/arch/sparc64/math-emu/fcmpq.c index 81dadf47a..9effefb1f 100644 --- a/arch/sparc64/math-emu/fcmpq.c +++ b/arch/sparc64/math-emu/fcmpq.c @@ -11,11 +11,8 @@ int FCMPQ(void *rd, void *rs2, void *rs1) rd = (void *)(((long)rd)&~3); __FP_UNPACK_Q(A, rs1); __FP_UNPACK_Q(B, rs2); - FP_CMP_Q(ret, A, B, 3); - switch (ret) { - case 1: ret = 2; break; - case -1: ret = 1; break; - } + FP_CMP_Q(ret, B, A, 3); + if (ret == -1) ret = 2; fsr = *(unsigned long *)rd; switch (fccno) { case 0: fsr &= ~0xc00; fsr |= (ret << 10); break; diff --git a/arch/sparc64/math-emu/fnegq.c b/arch/sparc64/math-emu/fnegq.c index dcdea3202..2251e3308 100644 --- a/arch/sparc64/math-emu/fnegq.c +++ b/arch/sparc64/math-emu/fnegq.c @@ -1,18 +1,7 @@ -#include "soft-fp.h" -#include "quad.h" - int FNEGQ(unsigned long *rd, unsigned long *rs2) { -/* - FP_DECL_Q(A); FP_DECL_Q(R); - - __FP_UNPACK_Q(A, rs2); - FP_NEG_Q(R, A); - __FP_PACK_Q(rd, R); - */ rd[0] = rs2[0] ^ 0x8000000000000000UL; rd[1] = rs2[1]; return 1; } - diff --git a/arch/sparc64/math-emu/math.c b/arch/sparc64/math-emu/math.c index 58ed21062..e0380720f 100644 --- a/arch/sparc64/math-emu/math.c +++ b/arch/sparc64/math-emu/math.c @@ -1,4 +1,4 @@ -/* $Id: math.c,v 1.3 1997/10/15 07:28:55 jj Exp $ +/* $Id: math.c,v 1.4 1998/04/06 16:09:57 jj Exp $ * arch/sparc64/math-emu/math.c * * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) @@ -7,7 +7,6 @@ * of glibc and has appropriate copyrights in it. */ -#include <linux/module.h> #include <linux/types.h> #include <linux/sched.h> @@ -70,7 +69,6 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f) if(tstate & TSTATE_PRIV) die_if_kernel("FPQuad from kernel", regs); - MOD_INC_USE_COUNT; if(current->tss.flags & SPARC_FLAG_32BIT) pc = (u32)pc; if (get_user(insn, (u32 *)pc) != -EFAULT) { @@ -182,28 +180,7 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f) func(rd, rs2, rs1); regs->tpc = regs->tnpc; regs->tnpc += 4; - MOD_DEC_USE_COUNT; return 1; } -err: MOD_DEC_USE_COUNT; - return 0; +err: return 0; } - -#ifdef MODULE - -MODULE_AUTHOR("Jakub Jelinek (jj@sunsite.mff.cuni.cz), Richard Henderson (rth@cygnus.com)"); -MODULE_DESCRIPTION("FPU emulation module"); - -extern int (*handle_mathemu)(struct pt_regs *, struct fpustate *); - -int init_module(void) -{ - handle_mathemu = do_mathemu; - return 0; -} - -void cleanup_module(void) -{ - handle_mathemu = NULL; -} -#endif diff --git a/arch/sparc64/math-emu/op-2.h b/arch/sparc64/math-emu/op-2.h index 879b6004f..5999cfc3b 100644 --- a/arch/sparc64/math-emu/op-2.h +++ b/arch/sparc64/math-emu/op-2.h @@ -207,6 +207,12 @@ R##_f1 = _FP_FRAC_WORD_4(_z,1); \ } while (0) +/* This next macro appears to be totally broken. Fortunately nowhere + * seems to use it :-> The problem is that we define _z[4] but + * then use it in _FP_FRAC_SRS_4, which will attempt to access + * _z_f[n] which will cause an error. The fix probably involves + * declaring it with _FP_FRAC_DECL_4, see previous macro. -- PMM 02/1998 + */ #define _FP_MUL_MEAT_2_gmp(fs, R, X, Y) \ do { \ _FP_W_TYPE _x[2], _y[2], _z[4]; \ @@ -226,6 +232,11 @@ /* * Division algorithms: + * This seems to be giving me difficulties -- PMM + * Look, NetBSD seems to be able to comment algorithms. Can't you? + * I've thrown printks at the problem. + * This now appears to work, but I still don't really know why. + * Also, I don't think the result is properly normalised... */ #define _FP_DIV_MEAT_2_udiv_64(fs, R, X, Y) \ @@ -236,10 +247,17 @@ _FP_W_TYPE _n_f3, _n_f2, _n_f1, _n_f0, _r_f1, _r_f0; \ _FP_W_TYPE _q_f1, _q_f0, _m_f1, _m_f0; \ _FP_W_TYPE _rmem[2], _qmem[2]; \ - \ + /* I think this check is to ensure that the result is normalised. \ + * Assuming X,Y normalised (ie in [1.0,2.0)) X/Y will be in \ + * [0.5,2.0). Furthermore, it will be less than 1.0 iff X < Y. \ + * In this case we tweak things. (this is based on comments in \ + * the NetBSD FPU emulation code. ) \ + * We know X,Y are normalised because we ensure this as part of \ + * the unpacking process. -- PMM \ + */ \ if (_FP_FRAC_GT_2(X, Y)) \ { \ - R##_e++; \ +/* R##_e++; */ \ _n_f3 = X##_f1 >> 1; \ _n_f2 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \ _n_f1 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \ @@ -247,14 +265,15 @@ } \ else \ { \ + R##_e--; \ _n_f3 = X##_f1; \ _n_f2 = X##_f0; \ _n_f1 = _n_f0 = 0; \ } \ \ /* Normalize, i.e. make the most significant bit of the \ - denominator set. */ \ - _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs - 1); \ + denominator set. CHANGED: - 1 to nothing -- PMM */ \ + _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs /* -1 */); \ \ /* Do the 256/128 bit division given the 128-bit _fp_udivmodtf4 \ primitive snagged from libgcc2.c. */ \ @@ -295,6 +314,11 @@ \ R##_f1 = _q_f1; \ R##_f0 = _q_f0 | ((_r_f1 | _r_f0) != 0); \ + /* adjust so answer is normalized again. I'm not sure what the \ + * final sz param should be. In practice it's never used since \ + * N is 1 which is always going to be < _FP_W_TYPE_SIZE... \ + */ \ + /* _FP_FRAC_SRS_2(R,1,_FP_WFRACBITS_##fs); */ \ } while (0) @@ -406,3 +430,4 @@ D##_f1 = 0; \ _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ } while (0) + diff --git a/arch/sparc64/math-emu/op-4.h b/arch/sparc64/math-emu/op-4.h index 2f917a847..5f7099271 100644 --- a/arch/sparc64/math-emu/op-4.h +++ b/arch/sparc64/math-emu/op-4.h @@ -1,11 +1,41 @@ /* * Basic four-word fraction declaration and manipulation. + * + * When adding quadword support for 32 bit machines, we need + * to be a little careful as double multiply uses some of these + * macros: (in op-2.h) + * _FP_MUL_MEAT_2_wide() uses _FP_FRAC_DECL_4, _FP_FRAC_WORD_4, + * _FP_FRAC_ADD_4, _FP_FRAC_SRS_4 + * _FP_MUL_MEAT_2_gmp() uses _FP_FRAC_SRS_4 (and should use + * _FP_FRAC_DECL_4: it appears to be broken and is not used + * anywhere anyway. ) + * + * I've now fixed all the macros that were here from the sparc64 code. + * [*none* of the shift macros were correct!] -- PMM 02/1998 + * + * The only quadword stuff that remains to be coded is: + * 1) the conversion to/from ints, which requires + * that we check (in op-common.h) that the following do the right thing + * for quadwords: _FP_TO_INT(Q,4,r,X,rsz,rsg), _FP_FROM_INT(Q,4,X,r,rs,rt) + * 2) multiply, divide and sqrt, which require: + * _FP_MUL_MEAT_4_*(R,X,Y), _FP_DIV_MEAT_4_*(R,X,Y), _FP_SQRT_MEAT_4(R,S,T,X,q), + * This also needs _FP_MUL_MEAT_Q and _FP_DIV_MEAT_Q to be defined to + * some suitable _FP_MUL_MEAT_4_* macros in sfp-machine.h. + * [we're free to choose whatever FP_MUL_MEAT_4_* macros we need for + * these; they are used nowhere else. ] */ #define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4] #define _FP_FRAC_COPY_4(D,S) \ (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \ D##_f[2] = S##_f[2], D##_f[3] = S##_f[3]) +/* The _FP_FRAC_SET_n(X,I) macro is intended for use with another + * macro such as _FP_ZEROFRAC_n which returns n comma separated values. + * The result is that we get an expansion of __FP_FRAC_SET_n(X,I0,I1,I2,I3) + * which just assigns the In values to the array X##_f[]. + * This is why the number of parameters doesn't appear to match + * at first glance... -- PMM + */ #define _FP_FRAC_SET_4(X,I) __FP_FRAC_SET_4(X, I) #define _FP_FRAC_HIGH_4(X) (X##_f[3]) #define _FP_FRAC_LOW_4(X) (X##_f[0]) @@ -19,26 +49,32 @@ _down = _FP_W_TYPE_SIZE - _up; \ for (_i = 3; _i > _skip; --_i) \ X##_f[_i] = X##_f[_i-_skip] << _up | X##_f[_i-_skip-1] >> _down; \ - X##_f[_i] <<= _up; \ +/* bugfixed: was X##_f[_i] <<= _up; -- PMM 02/1998 */ \ + X##_f[_i] = X##_f[0] << _up; \ for (--_i; _i >= 0; --_i) \ X##_f[_i] = 0; \ } while (0) +/* This one was broken too */ #define _FP_FRAC_SRL_4(X,N) \ do { \ _FP_I_TYPE _up, _down, _skip, _i; \ _skip = (N) / _FP_W_TYPE_SIZE; \ _down = (N) % _FP_W_TYPE_SIZE; \ _up = _FP_W_TYPE_SIZE - _down; \ - for (_i = 0; _i < 4-_skip; ++_i) \ + for (_i = 0; _i < 3-_skip; ++_i) \ X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up; \ - X##_f[_i] >>= _down; \ + X##_f[_i] = X##_f[3] >> _down; \ for (++_i; _i < 4; ++_i) \ X##_f[_i] = 0; \ } while (0) -/* Right shift with sticky-lsb. */ +/* Right shift with sticky-lsb. + * What this actually means is that we do a standard right-shift, + * but that if any of the bits that fall off the right hand side + * were one then we always set the LSbit. + */ #define _FP_FRAC_SRS_4(X,N,size) \ do { \ _FP_I_TYPE _up, _down, _skip, _i; \ @@ -48,13 +84,15 @@ _up = _FP_W_TYPE_SIZE - _down; \ for (_s = _i = 0; _i < _skip; ++_i) \ _s |= X##_f[_i]; \ - _s = X##_f[_i] << _up; \ - X##_f[0] = X##_f[_skip] >> _down | X##_f[_skip+1] << _up | (_s != 0); \ - for (_i = 1; _i < 4-_skip; ++_i) \ + _s |= X##_f[_i] << _up; \ +/* s is now != 0 if we want to set the LSbit */ \ + for (_i = 0; _i < 3-_skip; ++_i) \ X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up; \ - X##_f[_i] >>= _down; \ + X##_f[_i] = X##_f[3] >> _down; \ for (++_i; _i < 4; ++_i) \ X##_f[_i] = 0; \ + /* don't fix the LSB until the very end when we're sure f[0] is stable */ \ + X##_f[0] |= (_s != 0); \ } while (0) #define _FP_FRAC_ADD_4(R,X,Y) \ @@ -62,6 +100,92 @@ X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) +#define _FP_FRAC_SUB_4(R,X,Y) \ + __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ + X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ + Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) + +#define _FP_FRAC_ADDI_4(X,I) \ + __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I) + +#define _FP_ZEROFRAC_4 0,0,0,0 +#define _FP_MINFRAC_4 0,0,0,1 + +#define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0) +#define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE)X##_f[3] < 0) +#define _FP_FRAC_OVERP_4(fs,X) (X##_f[0] & _FP_OVERFLOW_##fs) + +#define _FP_FRAC_EQ_4(X,Y) \ + (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \ + && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3]) + +#define _FP_FRAC_GT_4(X,Y) \ + (X##_f[3] > Y##_f[3] || \ + (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ + (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ + (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0]) \ + )) \ + )) \ + ) + +#define _FP_FRAC_GE_4(X,Y) \ + (X##_f[3] > Y##_f[3] || \ + (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ + (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ + (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0]) \ + )) \ + )) \ + ) + + +#define _FP_FRAC_CLZ_4(R,X) \ + do { \ + if (X##_f[3]) \ + { \ + __FP_CLZ(R,X##_f[3]); \ + } \ + else if (X##_f[2]) \ + { \ + __FP_CLZ(R,X##_f[2]); \ + R += _FP_W_TYPE_SIZE; \ + } \ + else if (X##_f[1]) \ + { \ + __FP_CLZ(R,X##_f[2]); \ + R += _FP_W_TYPE_SIZE*2; \ + } \ + else \ + { \ + __FP_CLZ(R,X##_f[0]); \ + R += _FP_W_TYPE_SIZE*3; \ + } \ + } while(0) + + +#define _FP_UNPACK_RAW_4(fs, X, val) \ + do { \ + union _FP_UNION_##fs _flo; _flo.flt = (val); \ + X##_f[0] = _flo.bits.frac0; \ + X##_f[1] = _flo.bits.frac1; \ + X##_f[2] = _flo.bits.frac2; \ + X##_f[3] = _flo.bits.frac3; \ + X##_e = _flo.bits.exp; \ + X##_s = _flo.bits.sign; \ + } while (0) + +#define _FP_PACK_RAW_4(fs, val, X) \ + do { \ + union _FP_UNION_##fs _flo; \ + _flo.bits.frac0 = X##_f[0]; \ + _flo.bits.frac1 = X##_f[1]; \ + _flo.bits.frac2 = X##_f[2]; \ + _flo.bits.frac3 = X##_f[3]; \ + _flo.bits.exp = X##_e; \ + _flo.bits.sign = X##_s; \ + (val) = _flo.flt; \ + } while (0) + + /* * Internals */ @@ -76,3 +200,98 @@ r2 = x2 + y2 + (r1 < x1), \ r3 = x3 + y3 + (r2 < x2)) #endif + +#ifndef __FP_FRAC_SUB_4 +#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ + (r0 = x0 - y0, \ + r1 = x1 - y1 - (r0 > x0), \ + r2 = x2 - y2 - (r1 > x1), \ + r3 = x3 - y3 - (r2 > x2)) +#endif + +#ifndef __FP_FRAC_ADDI_4 +/* I always wanted to be a lisp programmer :-> */ +#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \ + (x3 += ((x2 += ((x1 += ((x0 += i) < x0)) < x1) < x2))) +#endif + +/* Convert FP values between word sizes. This appears to be more + * complicated than I'd have expected it to be, so these might be + * wrong... These macros are in any case somewhat bogus because they + * use information about what various FRAC_n variables look like + * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do + * the ones in op-2.h and op-1.h. + */ +#define _FP_FRAC_CONV_1_4(dfs, sfs, D, S) \ + do { \ + _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ + _FP_WFRACBITS_##sfs); \ + D##_f = S##_f[0]; \ + } while (0) + +#define _FP_FRAC_CONV_2_4(dfs, sfs, D, S) \ + do { \ + _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ + _FP_WFRACBITS_##sfs); \ + D##_f0 = S##_f[0]; \ + D##_f1 = S##_f[1]; \ + } while (0) + +/* Assembly/disassembly for converting to/from integral types. + * No shifting or overflow handled here. + */ +/* Put the FP value X into r, which is an integer of size rsize. */ +#define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \ + do { \ + if (rsize <= _FP_W_TYPE_SIZE) \ + r = X##_f[0]; \ + else if (rsize <= 2*_FP_W_TYPE_SIZE) \ + { \ + r = X##_f[1]; \ + r <<= _FP_W_TYPE_SIZE; \ + r += X##_f[0]; \ + } \ + else \ + { \ + /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \ + /* and int == 4words as a single case. */ \ + r = X##_f[3]; \ + r <<= _FP_W_TYPE_SIZE; \ + r += X##_f[2]; \ + r <<= _FP_W_TYPE_SIZE; \ + r += X##_f[1]; \ + r <<= _FP_W_TYPE_SIZE; \ + r += X##_f[0]; \ + } \ + } while (0) + +/* "No disassemble Number Five!" */ +/* move an integer of size rsize into X's fractional part. We rely on + * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid + * having to mask the values we store into it. + */ +#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \ + do { \ + X##_f[0] = r; \ + X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ + X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \ + X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \ + } while (0); + +#define _FP_FRAC_CONV_4_1(dfs, sfs, D, S) \ + do { \ + D##_f[0] = S##_f; \ + D##_f[1] = D##_f[2] = D##_f[3] = 0; \ + _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ + } while (0) + +#define _FP_FRAC_CONV_4_2(dfs, sfs, D, S) \ + do { \ + D##_f[0] = S##_f0; \ + D##_f[1] = S##_f1; \ + D##_f[2] = D##_f[3] = 0; \ + _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ + } while (0) + +/* FIXME! This has to be written */ +#define _FP_SQRT_MEAT_4(R, S, T, X, q) diff --git a/arch/sparc64/math-emu/op-common.h b/arch/sparc64/math-emu/op-common.h index 8123e4c46..d4ce104f6 100644 --- a/arch/sparc64/math-emu/op-common.h +++ b/arch/sparc64/math-emu/op-common.h @@ -1,3 +1,4 @@ + #define _FP_DECL(wc, X) \ _FP_I_TYPE X##_c, X##_s, X##_e; \ _FP_FRAC_DECL_##wc(X) @@ -507,6 +508,29 @@ do { \ * Convert from FP to integer */ +/* "When a NaN, infinity, large positive argument >= 2147483648.0, or + * large negative argument <= -2147483649.0 is converted to an integer, + * the invalid_current bit...should be set and fp_exception_IEEE_754 should + * be raised. If the floating point invalid trap is disabled, no trap occurs + * and a numerical result is generated: if the sign bit of the operand + * is 0, the result is 2147483647; if the sign bit of the operand is 1, + * the result is -2147483648." + * Similarly for conversion to extended ints, except that the boundaries + * are >= 2^63, <= -(2^63 + 1), and the results are 2^63 + 1 for s=0 and + * -2^63 for s=1. + * -- SPARC Architecture Manual V9, Appendix B, which specifies how + * SPARCs resolve implementation dependencies in the IEEE-754 spec. + * I don't believe that the code below follows this. I'm not even sure + * it's right! + * It doesn't cope with needing to convert to an n bit integer when there + * is no n bit integer type. Fortunately gcc provides long long so this + * isn't a problem for sparc32. + * I have, however, fixed its NaN handling to conform as above. + * -- PMM 02/1998 + * NB: rsigned is not 'is r declared signed?' but 'should the value stored + * in r be signed or unsigned?'. r is always(?) declared unsigned. + * Comments below are mine, BTW -- PMM + */ #define _FP_TO_INT(fs, wc, r, X, rsize, rsigned) \ do { \ switch (X##_c) \ @@ -514,13 +538,14 @@ do { \ case FP_CLS_NORMAL: \ if (X##_e < 0) \ { \ - case FP_CLS_NAN: \ + /* case FP_CLS_NAN: see above! */ \ case FP_CLS_ZERO: \ r = 0; \ } \ else if (X##_e >= rsize - (rsigned != 0)) \ - { \ - case FP_CLS_INF: \ + { /* overflow */ \ + case FP_CLS_NAN: \ + case FP_CLS_INF: \ if (rsigned) \ { \ r = 1; \ @@ -604,6 +629,23 @@ do { \ /* Count leading zeros in a word. */ #ifndef __FP_CLZ +#if _FP_W_TYPE_SIZE < 64 +/* this is just to shut the compiler up about shifts > word length -- PMM 02/1998 */ +#define __FP_CLZ(r, x) \ + do { \ + _FP_W_TYPE _t = (x); \ + r = _FP_W_TYPE_SIZE - 1; \ + if (_t > 0xffff) r -= 16; \ + if (_t > 0xffff) _t >>= 16; \ + if (_t > 0xff) r -= 8; \ + if (_t > 0xff) _t >>= 8; \ + if (_t & 0xf0) r -= 4; \ + if (_t & 0xf0) _t >>= 4; \ + if (_t & 0xc) r -= 2; \ + if (_t & 0xc) _t >>= 2; \ + if (_t & 0x2) r -= 1; \ + } while (0) +#else /* not _FP_W_TYPE_SIZE < 64 */ #define __FP_CLZ(r, x) \ do { \ _FP_W_TYPE _t = (x); \ @@ -620,9 +662,11 @@ do { \ if (_t & 0xc) _t >>= 2; \ if (_t & 0x2) r -= 1; \ } while (0) -#endif +#endif /* not _FP_W_TYPE_SIZE < 64 */ +#endif /* ndef __FP_CLZ */ #define _FP_DIV_HELP_imm(q, r, n, d) \ do { \ q = n / d, r = n % d; \ } while (0) + diff --git a/arch/sparc64/math-emu/quad.h b/arch/sparc64/math-emu/quad.h index dfc3b4eea..48fcc798c 100644 --- a/arch/sparc64/math-emu/quad.h +++ b/arch/sparc64/math-emu/quad.h @@ -1,12 +1,17 @@ /* * Definitions for IEEE Quad Precision */ - -#if _FP_W_TYPE_SIZE < 64 -#error "Only stud muffins allowed, schmuck." +#if _FP_W_TYPE_SIZE < 32 +/* It appears to be traditional to abuse 16bitters in these header files... */ +#error "Here's a nickel, kid. Go buy yourself a real computer." #endif +#if _FP_W_TYPE_SIZE < 64 +/* This is all terribly experimental and I don't know if it'll work properly -- PMM 02/1998 */ +#define _FP_FRACTBITS_Q (4*_FP_W_TYPE_SIZE) +#else #define _FP_FRACTBITS_Q (2*_FP_W_TYPE_SIZE) +#endif #define _FP_FRACBITS_Q 113 #define _FP_FRACXBITS_Q (_FP_FRACTBITS_Q - _FP_FRACBITS_Q) @@ -23,6 +28,66 @@ #define _FP_OVERFLOW_Q \ ((_FP_W_TYPE)1 << (_FP_WFRACBITS_Q % _FP_W_TYPE_SIZE)) +#if _FP_W_TYPE_SIZE < 64 + +union _FP_UNION_Q +{ + long double flt; + struct + { +#if __BYTE_ORDER == __BIG_ENDIAN + unsigned sign : 1; + unsigned exp : _FP_EXPBITS_Q; + unsigned long frac3 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0)-(_FP_W_TYPE_SIZE * 3); + unsigned long frac2 : _FP_W_TYPE_SIZE; + unsigned long frac1 : _FP_W_TYPE_SIZE; + unsigned long frac0 : _FP_W_TYPE_SIZE; +#else + unsigned long frac0 : _FP_W_TYPE_SIZE; + unsigned long frac1 : _FP_W_TYPE_SIZE; + unsigned long frac2 : _FP_W_TYPE_SIZE; + unsigned long frac3 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0)-(_FP_W_TYPE_SIZE * 3); + unsigned exp : _FP_EXPBITS_Q; + unsigned sign : 1; +#endif /* not bigendian */ + } bits __attribute__((packed)); +}; + + +#define FP_DECL_Q(X) _FP_DECL(4,X) +#define FP_UNPACK_RAW_Q(X,val) _FP_UNPACK_RAW_4(Q,X,val) +#define FP_PACK_RAW_Q(val,X) _FP_PACK_RAW_4(Q,val,X) + +#define FP_UNPACK_Q(X,val) \ + do { \ + _FP_UNPACK_RAW_4(Q,X,val); \ + _FP_UNPACK_CANONICAL(Q,4,X); \ + } while (0) + +#define FP_PACK_Q(val,X) \ + do { \ + _FP_PACK_CANONICAL(Q,4,X); \ + _FP_PACK_RAW_4(Q,val,X); \ + } while (0) + +#define FP_NEG_Q(R,X) _FP_NEG(Q,4,R,X) +#define FP_ADD_Q(R,X,Y) _FP_ADD(Q,4,R,X,Y) +/* single.h and double.h define FP_SUB_t this way too. However, _FP_SUB is + * never defined in op-common.h! Fortunately nobody seems to use the FP_SUB_t + * macros: I suggest a combination of FP_NEG and FP_ADD :-> -- PMM 02/1998 + */ +#define FP_SUB_Q(R,X,Y) _FP_SUB(Q,4,R,X,Y) +#define FP_MUL_Q(R,X,Y) _FP_MUL(Q,4,R,X,Y) +#define FP_DIV_Q(R,X,Y) _FP_DIV(Q,4,R,X,Y) +#define FP_SQRT_Q(R,X) _FP_SQRT(Q,4,R,X) + +#define FP_CMP_Q(r,X,Y,un) _FP_CMP(Q,4,r,X,Y,un) +#define FP_CMP_EQ_Q(r,X,Y) _FP_CMP_EQ(Q,4,r,X,Y) + +#define FP_TO_INT_Q(r,X,rsz,rsg) _FP_TO_INT(Q,4,r,X,rsz,rsg) +#define FP_FROM_INT_Q(X,r,rs,rt) _FP_FROM_INT(Q,4,X,r,rs,rt) + +#else /* not _FP_W_TYPE_SIZE < 64 */ union _FP_UNION_Q { long double flt /* __attribute__((mode(TF))) */ ; @@ -69,3 +134,5 @@ union _FP_UNION_Q #define FP_TO_INT_Q(r,X,rsz,rsg) _FP_TO_INT(Q,2,r,X,rsz,rsg) #define FP_FROM_INT_Q(X,r,rs,rt) _FP_FROM_INT(Q,2,X,r,rs,rt) + +#endif /* not _FP_W_TYPE_SIZE < 64 */ diff --git a/arch/sparc64/math-emu/single.h b/arch/sparc64/math-emu/single.h index fa7f386cd..f19d99451 100644 --- a/arch/sparc64/math-emu/single.h +++ b/arch/sparc64/math-emu/single.h @@ -3,7 +3,7 @@ */ #if _FP_W_TYPE_SIZE < 32 -#error "Here's a nickle kid. Go buy yourself a real computer." +#error "Here's a nickel kid. Go buy yourself a real computer." #endif #define _FP_FRACBITS_S 24 |