12 files changed, 383 insertions, 89 deletions
diff --git a/arch/sparc64/math-emu/Makefile b/arch/sparc64/math-emu/Makefile
index ea816d98e..8f695b1e2 100644
--- a/arch/sparc64/math-emu/Makefile
+++ b/arch/sparc64/math-emu/Makefile
@@ -1,5 +1,5 @@
 #
-# Makefile for the FPU Quad (long double) instruction emulation.
+# Makefile for the FPU instruction emulation.
 #
 # Note! Dependencies are done automagically by 'make dep', which also
 # removes any old dependencies. DON'T put your own dependencies here
@@ -16,18 +16,10 @@ O_OBJS   := math.o fabsq.o faddq.o fdivq.o fdmulq.o fitoq.o 		\
 		fmuls.o fmuld.o fdivs.o fdivd.o fsmuld.o		\
 		fstoi.o fdtoi.o fstox.o fdtox.o fstod.o fdtos.o
 
-ifeq ($(CONFIG_MATHEMU),m)
-M_OBJS   := $(O_TARGET)
-endif
-
 .S.s:
 	$(CPP) -D__ASSEMBLY__ -ansi $< -o $*.s
 
 .S.o:
 	$(CC) -D__ASSEMBLY__ -ansi -c $< -o $*.o
 
-ifneq ($(CONFIG_MATHEMU),y)
-do_it_all:
-endif
-
 include $(TOPDIR)/Rules.make
diff --git a/arch/sparc64/math-emu/double.h b/arch/sparc64/math-emu/double.h
index b68d76790..6aff6fdd5 100644
--- a/arch/sparc64/math-emu/double.h
+++ b/arch/sparc64/math-emu/double.h
@@ -3,7 +3,7 @@
  */
 
 #if _FP_W_TYPE_SIZE < 32
-#error "Here's a nickle kid.  Go buy yourself a real computer."
+#error "Here's a nickel kid.  Go buy yourself a real computer."
 #endif
 
 #if _FP_W_TYPE_SIZE < 64
diff --git a/arch/sparc64/math-emu/fabsq.c b/arch/sparc64/math-emu/fabsq.c
index e6aa497c8..e01b02046 100644
--- a/arch/sparc64/math-emu/fabsq.c
+++ b/arch/sparc64/math-emu/fabsq.c
@@ -1,18 +1,5 @@
-#include "soft-fp.h"
-#include "quad.h"
-
 int FABSQ(unsigned long *rd, unsigned long *rs2)
 {
-/*
-	FP_DECL_Q(A); FP_DECL_Q(R);
-
-	__FP_UNPACK_Q(A, rs2);
-	_FP_FRAC_COPY_2(R, A);
-	R_c = A_c;
-	R_e = A_e;
-	R_s = 0;
-	__FP_PACK_Q(rd, R);
- */
 	rd[0] = rs2[0] & 0x7fffffffffffffffUL;
 	rd[1] = rs2[1];
 	return 1;
diff --git a/arch/sparc64/math-emu/fcmpeq.c b/arch/sparc64/math-emu/fcmpeq.c
index cb37bc0db..e74b1b06b 100644
--- a/arch/sparc64/math-emu/fcmpeq.c
+++ b/arch/sparc64/math-emu/fcmpeq.c
@@ -11,11 +11,8 @@ int FCMPEQ(void *rd, void *rs2, void *rs1)
 	rd = (void *)(((long)rd)&~3);
 	__FP_UNPACK_Q(A, rs1);
 	__FP_UNPACK_Q(B, rs2);
-	FP_CMP_Q(ret, A, B, 3);
-	switch (ret) {
-	case 1: ret = 2; break;
-	case -1: ret = 1; break;
-	}
+	FP_CMP_Q(ret, B, A, 3);
+	if (ret == -1) ret = 2;
 	fsr = *(unsigned long *)rd;
 	switch (fccno) {
 	case 0: fsr &= ~0xc00; fsr |= (ret << 10); break;
diff --git a/arch/sparc64/math-emu/fcmpq.c b/arch/sparc64/math-emu/fcmpq.c
index 81dadf47a..9effefb1f 100644
--- a/arch/sparc64/math-emu/fcmpq.c
+++ b/arch/sparc64/math-emu/fcmpq.c
@@ -11,11 +11,8 @@ int FCMPQ(void *rd, void *rs2, void *rs1)
 	rd = (void *)(((long)rd)&~3);
 	__FP_UNPACK_Q(A, rs1);
 	__FP_UNPACK_Q(B, rs2);
-	FP_CMP_Q(ret, A, B, 3);
-	switch (ret) {
-	case 1: ret = 2; break;
-	case -1: ret = 1; break;
-	}
+	FP_CMP_Q(ret, B, A, 3);
+	if (ret == -1) ret = 2;
 	fsr = *(unsigned long *)rd;
 	switch (fccno) {
 	case 0: fsr &= ~0xc00; fsr |= (ret << 10); break;
diff --git a/arch/sparc64/math-emu/fnegq.c b/arch/sparc64/math-emu/fnegq.c
index dcdea3202..2251e3308 100644
--- a/arch/sparc64/math-emu/fnegq.c
+++ b/arch/sparc64/math-emu/fnegq.c
@@ -1,18 +1,7 @@
-#include "soft-fp.h"
-#include "quad.h"
-
 int FNEGQ(unsigned long *rd, unsigned long *rs2)
 {
-/*
-	FP_DECL_Q(A); FP_DECL_Q(R);
-
-	__FP_UNPACK_Q(A, rs2);
-	FP_NEG_Q(R, A);
-	__FP_PACK_Q(rd, R);
- */
 	rd[0] = rs2[0] ^ 0x8000000000000000UL;
 	rd[1] = rs2[1];
 	return 1;
 }
 
-                
diff --git a/arch/sparc64/math-emu/math.c b/arch/sparc64/math-emu/math.c
index 58ed21062..e0380720f 100644
--- a/arch/sparc64/math-emu/math.c
+++ b/arch/sparc64/math-emu/math.c
@@ -1,4 +1,4 @@
-/* $Id: math.c,v 1.3 1997/10/15 07:28:55 jj Exp $
+/* $Id: math.c,v 1.4 1998/04/06 16:09:57 jj Exp $
  * arch/sparc64/math-emu/math.c
  *
  * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
@@ -7,7 +7,6 @@
  * of glibc and has appropriate copyrights in it.
  */
 
-#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/sched.h>
 
@@ -70,7 +69,6 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f)
 
 	if(tstate & TSTATE_PRIV)
 		die_if_kernel("FPQuad from kernel", regs);
-	MOD_INC_USE_COUNT;
 	if(current->tss.flags & SPARC_FLAG_32BIT)
 		pc = (u32)pc;
 	if (get_user(insn, (u32 *)pc) != -EFAULT) {
@@ -182,28 +180,7 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f)
 		func(rd, rs2, rs1);
 		regs->tpc = regs->tnpc;
 		regs->tnpc += 4;
-		MOD_DEC_USE_COUNT;
 		return 1;
 	}
-err:	MOD_DEC_USE_COUNT;
-	return 0;
+err:	return 0;
 }
-
-#ifdef MODULE
-
-MODULE_AUTHOR("Jakub Jelinek (jj@sunsite.mff.cuni.cz), Richard Henderson (rth@cygnus.com)");
-MODULE_DESCRIPTION("FPU emulation module");
-
-extern int (*handle_mathemu)(struct pt_regs *, struct fpustate *);
-
-int init_module(void)
-{
-	handle_mathemu = do_mathemu;
-	return 0;
-}
-
-void cleanup_module(void)
-{
-	handle_mathemu = NULL;
-}
-#endif
diff --git a/arch/sparc64/math-emu/op-2.h b/arch/sparc64/math-emu/op-2.h
index 879b6004f..5999cfc3b 100644
--- a/arch/sparc64/math-emu/op-2.h
+++ b/arch/sparc64/math-emu/op-2.h
@@ -207,6 +207,12 @@
     R##_f1 = _FP_FRAC_WORD_4(_z,1);					\
   } while (0)
 
+/* This next macro appears to be totally broken. Fortunately nowhere
+ * seems to use it :-> The problem is that we define _z[4] but
+ * then use it in _FP_FRAC_SRS_4, which will attempt to access
+ * _z_f[n] which will cause an error. The fix probably involves 
+ * declaring it with _FP_FRAC_DECL_4, see previous macro. -- PMM 02/1998 
+ */
 #define _FP_MUL_MEAT_2_gmp(fs, R, X, Y)					\
   do {									\
     _FP_W_TYPE _x[2], _y[2], _z[4];					\
@@ -226,6 +232,11 @@
 
 /*
  * Division algorithms:
+ * This seems to be giving me difficulties -- PMM 
+ * Look, NetBSD seems to be able to comment algorithms. Can't you?
+ * I've thrown printks at the problem.
+ * This now appears to work, but I still don't really know why.
+ * Also, I don't think the result is properly normalised...
  */
 
 #define _FP_DIV_MEAT_2_udiv_64(fs, R, X, Y)				\
@@ -236,10 +247,17 @@
     _FP_W_TYPE _n_f3, _n_f2, _n_f1, _n_f0, _r_f1, _r_f0;		\
     _FP_W_TYPE _q_f1, _q_f0, _m_f1, _m_f0;				\
     _FP_W_TYPE _rmem[2], _qmem[2];					\
-									\
+    /* I think this check is to ensure that the result is normalised.   \
+     * Assuming X,Y normalised (ie in [1.0,2.0)) X/Y will be in         \
+     * [0.5,2.0). Furthermore, it will be less than 1.0 iff X < Y.      \
+     * In this case we tweak things. (this is based on comments in      \
+     * the NetBSD FPU emulation code. )                                 \
+     * We know X,Y are normalised because we ensure this as part of     \
+     * the unpacking process. -- PMM                                    \
+     */									\
     if (_FP_FRAC_GT_2(X, Y))						\
       {									\
-	R##_e++;							\
+/*	R##_e++; */							\
 	_n_f3 = X##_f1 >> 1;						\
 	_n_f2 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;		\
 	_n_f1 = X##_f0 << (_FP_W_TYPE_SIZE - 1);			\
@@ -247,14 +265,15 @@
       }									\
     else								\
       {									\
+	R##_e--;							\
 	_n_f3 = X##_f1;							\
 	_n_f2 = X##_f0;							\
 	_n_f1 = _n_f0 = 0;						\
       }									\
 									\
     /* Normalize, i.e. make the most significant bit of the 		\
-       denominator set.  */						\
-    _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs - 1);				\
+       denominator set.  CHANGED: - 1 to nothing -- PMM */		\
+    _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs /* -1 */);			\
 									\
     /* Do the 256/128 bit division given the 128-bit _fp_udivmodtf4 	\
        primitive snagged from libgcc2.c.  */				\
@@ -295,6 +314,11 @@
 									\
     R##_f1 = _q_f1;							\
     R##_f0 = _q_f0 | ((_r_f1 | _r_f0) != 0);				\
+    /* adjust so answer is normalized again. I'm not sure what the 	\
+     * final sz param should be. In practice it's never used since      \
+     * N is 1 which is always going to be < _FP_W_TYPE_SIZE...		\
+     */									\
+    /* _FP_FRAC_SRS_2(R,1,_FP_WFRACBITS_##fs);	*/			\
   } while (0)
 
 
@@ -406,3 +430,4 @@
     D##_f1 = 0;								\
     _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));	\
   } while (0)
+
diff --git a/arch/sparc64/math-emu/op-4.h b/arch/sparc64/math-emu/op-4.h
index 2f917a847..5f7099271 100644
--- a/arch/sparc64/math-emu/op-4.h
+++ b/arch/sparc64/math-emu/op-4.h
@@ -1,11 +1,41 @@
 /*
  * Basic four-word fraction declaration and manipulation.
+ *
+ * When adding quadword support for 32 bit machines, we need
+ * to be a little careful as double multiply uses some of these
+ * macros: (in op-2.h)
+ * _FP_MUL_MEAT_2_wide() uses _FP_FRAC_DECL_4, _FP_FRAC_WORD_4,
+ * _FP_FRAC_ADD_4, _FP_FRAC_SRS_4
+ * _FP_MUL_MEAT_2_gmp() uses _FP_FRAC_SRS_4 (and should use
+ * _FP_FRAC_DECL_4: it appears to be broken and is not used 
+ * anywhere anyway. )
+ *
+ * I've now fixed all the macros that were here from the sparc64 code.
+ * [*none* of the shift macros were correct!] -- PMM 02/1998
+ * 
+ * The only quadword stuff that remains to be coded is: 
+ * 1) the conversion to/from ints, which requires 
+ * that we check (in op-common.h) that the following do the right thing
+ * for quadwords: _FP_TO_INT(Q,4,r,X,rsz,rsg), _FP_FROM_INT(Q,4,X,r,rs,rt)
+ * 2) multiply, divide and sqrt, which require:
+ * _FP_MUL_MEAT_4_*(R,X,Y), _FP_DIV_MEAT_4_*(R,X,Y), _FP_SQRT_MEAT_4(R,S,T,X,q),
+ * This also needs _FP_MUL_MEAT_Q and _FP_DIV_MEAT_Q to be defined to
+ * some suitable _FP_MUL_MEAT_4_* macros in sfp-machine.h.
+ * [we're free to choose whatever FP_MUL_MEAT_4_* macros we need for
+ * these; they are used nowhere else. ]
  */
 
 #define _FP_FRAC_DECL_4(X)	_FP_W_TYPE X##_f[4]
 #define _FP_FRAC_COPY_4(D,S)			\
   (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],	\
    D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
+/* The _FP_FRAC_SET_n(X,I) macro is intended for use with another
+ * macro such as _FP_ZEROFRAC_n which returns n comma separated values.
+ * The result is that we get an expansion of __FP_FRAC_SET_n(X,I0,I1,I2,I3)
+ * which just assigns the In values to the array X##_f[]. 
+ * This is why the number of parameters doesn't appear to match
+ * at first glance...      -- PMM 
+ */
 #define _FP_FRAC_SET_4(X,I)	__FP_FRAC_SET_4(X, I)
 #define _FP_FRAC_HIGH_4(X)	(X##_f[3])
 #define _FP_FRAC_LOW_4(X)	(X##_f[0])
@@ -19,26 +49,32 @@
     _down = _FP_W_TYPE_SIZE - _up;					\
     for (_i = 3; _i > _skip; --_i)					\
       X##_f[_i] = X##_f[_i-_skip] << _up | X##_f[_i-_skip-1] >> _down;	\
-    X##_f[_i] <<= _up;							\
+/* bugfixed: was X##_f[_i] <<= _up;  -- PMM 02/1998 */                  \
+    X##_f[_i] = X##_f[0] << _up; 	                                \
     for (--_i; _i >= 0; --_i)						\
       X##_f[_i] = 0;							\
   } while (0)
 
+/* This one was broken too */
 #define _FP_FRAC_SRL_4(X,N)						\
   do {									\
     _FP_I_TYPE _up, _down, _skip, _i;					\
     _skip = (N) / _FP_W_TYPE_SIZE;					\
     _down = (N) % _FP_W_TYPE_SIZE;					\
     _up = _FP_W_TYPE_SIZE - _down;					\
-    for (_i = 0; _i < 4-_skip; ++_i)					\
+    for (_i = 0; _i < 3-_skip; ++_i)					\
       X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up;	\
-    X##_f[_i] >>= _down;						\
+    X##_f[_i] = X##_f[3] >> _down;			         	\
     for (++_i; _i < 4; ++_i)						\
       X##_f[_i] = 0;							\
   } while (0)
 
 
-/* Right shift with sticky-lsb.  */
+/* Right shift with sticky-lsb. 
+ * What this actually means is that we do a standard right-shift,
+ * but that if any of the bits that fall off the right hand side
+ * were one then we always set the LSbit.
+ */
 #define _FP_FRAC_SRS_4(X,N,size)					\
   do {									\
     _FP_I_TYPE _up, _down, _skip, _i;					\
@@ -48,13 +84,15 @@
     _up = _FP_W_TYPE_SIZE - _down;					\
     for (_s = _i = 0; _i < _skip; ++_i)					\
       _s |= X##_f[_i];							\
-    _s = X##_f[_i] << _up;						\
-    X##_f[0] = X##_f[_skip] >> _down | X##_f[_skip+1] << _up | (_s != 0); \
-    for (_i = 1; _i < 4-_skip; ++_i)					\
+    _s |= X##_f[_i] << _up;						\
+/* s is now != 0 if we want to set the LSbit */                         \
+    for (_i = 0; _i < 3-_skip; ++_i)					\
       X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up;	\
-    X##_f[_i] >>= _down;						\
+    X##_f[_i] = X##_f[3] >> _down;					\
     for (++_i; _i < 4; ++_i)						\
       X##_f[_i] = 0;							\
+    /* don't fix the LSB until the very end when we're sure f[0] is stable */ \
+    X##_f[0] |= (_s != 0);                                              \
   } while (0)
 
 #define _FP_FRAC_ADD_4(R,X,Y)						\
@@ -62,6 +100,92 @@
 		  X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
 		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
 
+#define _FP_FRAC_SUB_4(R,X,Y)                                           \
+  __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],		\
+		  X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
+		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+
+#define _FP_FRAC_ADDI_4(X,I)                                            \
+  __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
+
+#define _FP_ZEROFRAC_4  0,0,0,0
+#define _FP_MINFRAC_4   0,0,0,1
+
+#define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
+#define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
+#define _FP_FRAC_OVERP_4(fs,X)  (X##_f[0] & _FP_OVERFLOW_##fs)
+
+#define _FP_FRAC_EQ_4(X,Y)                              \
+ (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]          \
+  && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
+
+#define _FP_FRAC_GT_4(X,Y)                              \
+ (X##_f[3] > Y##_f[3] ||                                \
+  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
+   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
+    (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])       \
+   ))                                                   \
+  ))                                                    \
+ )
+
+#define _FP_FRAC_GE_4(X,Y)                              \
+ (X##_f[3] > Y##_f[3] ||                                \
+  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
+   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
+    (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])      \
+   ))                                                   \
+  ))                                                    \
+ )
+
+
+#define _FP_FRAC_CLZ_4(R,X)             \
+  do {                                  \
+    if (X##_f[3])                       \
+    {                                   \
+        __FP_CLZ(R,X##_f[3]);           \
+    }                                   \
+    else if (X##_f[2])                  \
+    {                                   \
+        __FP_CLZ(R,X##_f[2]);           \
+        R += _FP_W_TYPE_SIZE;           \
+    }                                   \
+    else if (X##_f[1])                  \
+    {                                   \
+        __FP_CLZ(R,X##_f[2]);           \
+        R += _FP_W_TYPE_SIZE*2;         \
+    }                                   \
+    else                                \
+    {                                   \
+        __FP_CLZ(R,X##_f[0]);           \
+        R += _FP_W_TYPE_SIZE*3;         \
+    }                                   \
+  } while(0)
+
+
+#define _FP_UNPACK_RAW_4(fs, X, val)                            \
+  do {                                                          \
+    union _FP_UNION_##fs _flo; _flo.flt = (val);        	\
+    X##_f[0] = _flo.bits.frac0;                                 \
+    X##_f[1] = _flo.bits.frac1;                                 \
+    X##_f[2] = _flo.bits.frac2;                                 \
+    X##_f[3] = _flo.bits.frac3;                                 \
+    X##_e  = _flo.bits.exp;                                     \
+    X##_s  = _flo.bits.sign;                                    \
+  } while (0)
+
+#define _FP_PACK_RAW_4(fs, val, X)                              \
+  do {                                                          \
+    union _FP_UNION_##fs _flo;					\
+    _flo.bits.frac0 = X##_f[0];                                 \
+    _flo.bits.frac1 = X##_f[1];                                 \
+    _flo.bits.frac2 = X##_f[2];                                 \
+    _flo.bits.frac3 = X##_f[3];                                 \
+    _flo.bits.exp   = X##_e;                                    \
+    _flo.bits.sign  = X##_s;                                    \
+    (val) = _flo.flt;                                   	\
+  } while (0)
+
+
 /*
  * Internals 
  */
@@ -76,3 +200,98 @@
    r2 = x2 + y2 + (r1 < x1),						\
    r3 = x3 + y3 + (r2 < x2))
 #endif
+
+#ifndef __FP_FRAC_SUB_4
+#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)		\
+  (r0 = x0 - y0,                                                        \
+   r1 = x1 - y1 - (r0 > x0),                                            \
+   r2 = x2 - y2 - (r1 > x1),                                            \
+   r3 = x3 - y3 - (r2 > x2))
+#endif
+
+#ifndef __FP_FRAC_ADDI_4
+/* I always wanted to be a lisp programmer :-> */
+#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)                                 \
+  (x3 += ((x2 += ((x1 += ((x0 += i) < x0)) < x1) < x2)))
+#endif
+
+/* Convert FP values between word sizes. This appears to be more
+ * complicated than I'd have expected it to be, so these might be
+ * wrong... These macros are in any case somewhat bogus because they
+ * use information about what various FRAC_n variables look like 
+ * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
+ * the ones in op-2.h and op-1.h. 
+ */
+#define _FP_FRAC_CONV_1_4(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),     \
+                        _FP_WFRACBITS_##sfs);                           \
+     D##_f = S##_f[0];                                                   \
+  } while (0)
+
+#define _FP_FRAC_CONV_2_4(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),     \
+                        _FP_WFRACBITS_##sfs);                           \
+     D##_f0 = S##_f[0];                                                  \
+     D##_f1 = S##_f[1];                                                  \
+  } while (0)
+
+/* Assembly/disassembly for converting to/from integral types.  
+ * No shifting or overflow handled here.
+ */
+/* Put the FP value X into r, which is an integer of size rsize. */
+#define _FP_FRAC_ASSEMBLE_4(r, X, rsize)                                \
+  do {                                                                  \
+    if (rsize <= _FP_W_TYPE_SIZE)                                       \
+      r = X##_f[0];                                                     \
+    else if (rsize <= 2*_FP_W_TYPE_SIZE)                                \
+    {                                                                   \
+      r = X##_f[1];                                                     \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[0];                                                    \
+    }                                                                   \
+    else                                                                \
+    {                                                                   \
+      /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
+      /* and int == 4words as a single case.                         */ \
+      r = X##_f[3];                                                     \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[2];                                                    \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[1];                                                    \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[0];                                                    \
+    }                                                                   \
+  } while (0)
+
+/* "No disassemble Number Five!" */
+/* move an integer of size rsize into X's fractional part. We rely on
+ * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
+ * having to mask the values we store into it.
+ */
+#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)                             \
+  do {                                                                  \
+    X##_f[0] = r;                                                       \
+    X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);   \
+    X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
+    X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
+  } while (0);
+
+#define _FP_FRAC_CONV_4_1(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     D##_f[0] = S##_f;                                                  \
+     D##_f[1] = D##_f[2] = D##_f[3] = 0;                                \
+     _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));    \
+   } while (0)
+
+#define _FP_FRAC_CONV_4_2(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     D##_f[0] = S##_f0;                                                 \
+     D##_f[1] = S##_f1;                                                 \
+     D##_f[2] = D##_f[3] = 0;                                           \
+     _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));    \
+   } while (0)
+
+/* FIXME! This has to be written */
+#define _FP_SQRT_MEAT_4(R, S, T, X, q)
diff --git a/arch/sparc64/math-emu/op-common.h b/arch/sparc64/math-emu/op-common.h
index 8123e4c46..d4ce104f6 100644
--- a/arch/sparc64/math-emu/op-common.h
+++ b/arch/sparc64/math-emu/op-common.h
@@ -1,3 +1,4 @@
+
 #define _FP_DECL(wc, X)			\
   _FP_I_TYPE X##_c, X##_s, X##_e;	\
   _FP_FRAC_DECL_##wc(X)
@@ -507,6 +508,29 @@ do {									\
  * Convert from FP to integer
  */
 
+/* "When a NaN, infinity, large positive argument >= 2147483648.0, or 
+ * large negative argument <= -2147483649.0 is converted to an integer,
+ * the invalid_current bit...should be set and fp_exception_IEEE_754 should
+ * be raised. If the floating point invalid trap is disabled, no trap occurs
+ * and a numerical result is generated: if the sign bit of the operand
+ * is 0, the result is 2147483647; if the sign bit of the operand is 1,
+ * the result is -2147483648."
+ * Similarly for conversion to extended ints, except that the boundaries
+ * are >= 2^63, <= -(2^63 + 1), and the results are 2^63 + 1 for s=0 and
+ * -2^63 for s=1.
+ * -- SPARC Architecture Manual V9, Appendix B, which specifies how
+ * SPARCs resolve implementation dependencies in the IEEE-754 spec.
+ * I don't believe that the code below follows this. I'm not even sure
+ * it's right! 
+ * It doesn't cope with needing to convert to an n bit integer when there
+ * is no n bit integer type. Fortunately gcc provides long long so this
+ * isn't a problem for sparc32.
+ * I have, however, fixed its NaN handling to conform as above.
+ *         -- PMM 02/1998
+ * NB: rsigned is not 'is r declared signed?' but 'should the value stored
+ * in r be signed or unsigned?'. r is always(?) declared unsigned.
+ * Comments below are mine, BTW -- PMM 
+ */
 #define _FP_TO_INT(fs, wc, r, X, rsize, rsigned)				\
   do {										\
     switch (X##_c)								\
@@ -514,13 +538,14 @@ do {									\
       case FP_CLS_NORMAL:							\
 	if (X##_e < 0)								\
 	  {									\
-	  case FP_CLS_NAN:							\
+	  /* case FP_CLS_NAN: see above! */					\
 	  case FP_CLS_ZERO:							\
 	    r = 0;								\
 	  }									\
 	else if (X##_e >= rsize - (rsigned != 0))				\
-	  {									\
-	  case FP_CLS_INF:							\
+	  {	/* overflow */							\
+	  case FP_CLS_NAN:                                                      \
+          case FP_CLS_INF:							\
 	    if (rsigned)							\
 	      {									\
 		r = 1;								\
@@ -604,6 +629,23 @@ do {									\
 /* Count leading zeros in a word.  */
 
 #ifndef __FP_CLZ
+#if _FP_W_TYPE_SIZE < 64
+/* this is just to shut the compiler up about shifts > word length -- PMM 02/1998 */
+#define __FP_CLZ(r, x)				\
+  do {						\
+    _FP_W_TYPE _t = (x);			\
+    r = _FP_W_TYPE_SIZE - 1;			\
+    if (_t > 0xffff) r -= 16;			\
+    if (_t > 0xffff) _t >>= 16;			\
+    if (_t > 0xff) r -= 8;			\
+    if (_t > 0xff) _t >>= 8;			\
+    if (_t & 0xf0) r -= 4;			\
+    if (_t & 0xf0) _t >>= 4;			\
+    if (_t & 0xc) r -= 2;			\
+    if (_t & 0xc) _t >>= 2;			\
+    if (_t & 0x2) r -= 1;			\
+  } while (0)
+#else /* not _FP_W_TYPE_SIZE < 64 */
 #define __FP_CLZ(r, x)				\
   do {						\
     _FP_W_TYPE _t = (x);			\
@@ -620,9 +662,11 @@ do {									\
     if (_t & 0xc) _t >>= 2;			\
     if (_t & 0x2) r -= 1;			\
   } while (0)
-#endif
+#endif /* not _FP_W_TYPE_SIZE < 64 */
+#endif /* ndef __FP_CLZ */
 
 #define _FP_DIV_HELP_imm(q, r, n, d)		\
   do {						\
     q = n / d, r = n % d;			\
   } while (0)
+
diff --git a/arch/sparc64/math-emu/quad.h b/arch/sparc64/math-emu/quad.h
index dfc3b4eea..48fcc798c 100644
--- a/arch/sparc64/math-emu/quad.h
+++ b/arch/sparc64/math-emu/quad.h
@@ -1,12 +1,17 @@
 /*
  * Definitions for IEEE Quad Precision
  */
-
-#if _FP_W_TYPE_SIZE < 64
-#error "Only stud muffins allowed, schmuck."
+#if _FP_W_TYPE_SIZE < 32
+/* It appears to be traditional to abuse 16bitters in these header files... */
+#error "Here's a nickel, kid. Go buy yourself a real computer."
 #endif
 
+#if _FP_W_TYPE_SIZE < 64
+/* This is all terribly experimental and I don't know if it'll work properly -- PMM 02/1998 */
+#define _FP_FRACTBITS_Q         (4*_FP_W_TYPE_SIZE)
+#else
 #define _FP_FRACTBITS_Q		(2*_FP_W_TYPE_SIZE)
+#endif
 
 #define _FP_FRACBITS_Q		113
 #define _FP_FRACXBITS_Q		(_FP_FRACTBITS_Q - _FP_FRACBITS_Q)
@@ -23,6 +28,66 @@
 #define _FP_OVERFLOW_Q		\
 	((_FP_W_TYPE)1 << (_FP_WFRACBITS_Q % _FP_W_TYPE_SIZE))
 
+#if _FP_W_TYPE_SIZE < 64
+
+union _FP_UNION_Q
+{
+   long double flt;
+   struct 
+   {
+#if __BYTE_ORDER == __BIG_ENDIAN
+      unsigned sign : 1;
+      unsigned exp : _FP_EXPBITS_Q;
+      unsigned long frac3 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0)-(_FP_W_TYPE_SIZE * 3);
+      unsigned long frac2 : _FP_W_TYPE_SIZE;
+      unsigned long frac1 : _FP_W_TYPE_SIZE;
+      unsigned long frac0 : _FP_W_TYPE_SIZE;
+#else
+      unsigned long frac0 : _FP_W_TYPE_SIZE;
+      unsigned long frac1 : _FP_W_TYPE_SIZE;
+      unsigned long frac2 : _FP_W_TYPE_SIZE;
+      unsigned long frac3 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0)-(_FP_W_TYPE_SIZE * 3);
+      unsigned exp : _FP_EXPBITS_Q;
+      unsigned sign : 1;
+#endif /* not bigendian */
+   } bits __attribute__((packed));
+};
+
+
+#define FP_DECL_Q(X)		_FP_DECL(4,X)
+#define FP_UNPACK_RAW_Q(X,val)	_FP_UNPACK_RAW_4(Q,X,val)
+#define FP_PACK_RAW_Q(val,X)	_FP_PACK_RAW_4(Q,val,X)
+
+#define FP_UNPACK_Q(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_4(Q,X,val);		\
+    _FP_UNPACK_CANONICAL(Q,4,X);	\
+  } while (0)
+
+#define FP_PACK_Q(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(Q,4,X);		\
+    _FP_PACK_RAW_4(Q,val,X);		\
+  } while (0)
+
+#define FP_NEG_Q(R,X)		_FP_NEG(Q,4,R,X)
+#define FP_ADD_Q(R,X,Y)		_FP_ADD(Q,4,R,X,Y)
+/* single.h and double.h define FP_SUB_t this way too. However, _FP_SUB is
+ * never defined in op-common.h! Fortunately nobody seems to use the FP_SUB_t 
+ * macros: I suggest a combination of FP_NEG and FP_ADD :-> -- PMM 02/1998
+ */
+#define FP_SUB_Q(R,X,Y)		_FP_SUB(Q,4,R,X,Y)
+#define FP_MUL_Q(R,X,Y)		_FP_MUL(Q,4,R,X,Y)
+#define FP_DIV_Q(R,X,Y)		_FP_DIV(Q,4,R,X,Y)
+#define FP_SQRT_Q(R,X)		_FP_SQRT(Q,4,R,X)
+
+#define FP_CMP_Q(r,X,Y,un)	_FP_CMP(Q,4,r,X,Y,un)
+#define FP_CMP_EQ_Q(r,X,Y)	_FP_CMP_EQ(Q,4,r,X,Y)
+
+#define FP_TO_INT_Q(r,X,rsz,rsg)  _FP_TO_INT(Q,4,r,X,rsz,rsg)
+#define FP_FROM_INT_Q(X,r,rs,rt)  _FP_FROM_INT(Q,4,X,r,rs,rt)
+
+#else   /* not _FP_W_TYPE_SIZE < 64 */
 union _FP_UNION_Q
 {
   long double flt /* __attribute__((mode(TF))) */ ;
@@ -69,3 +134,5 @@ union _FP_UNION_Q
 
 #define FP_TO_INT_Q(r,X,rsz,rsg)  _FP_TO_INT(Q,2,r,X,rsz,rsg)
 #define FP_FROM_INT_Q(X,r,rs,rt)  _FP_FROM_INT(Q,2,X,r,rs,rt)
+
+#endif /* not _FP_W_TYPE_SIZE < 64 */
diff --git a/arch/sparc64/math-emu/single.h b/arch/sparc64/math-emu/single.h
index fa7f386cd..f19d99451 100644
--- a/arch/sparc64/math-emu/single.h
+++ b/arch/sparc64/math-emu/single.h
@@ -3,7 +3,7 @@
  */
 
 #if _FP_W_TYPE_SIZE < 32
-#error "Here's a nickle kid.  Go buy yourself a real computer."
+#error "Here's a nickel kid.  Go buy yourself a real computer."
 #endif
 
 #define _FP_FRACBITS_S		24