21 files changed, 767 insertions, 737 deletions
diff --git a/arch/sparc/math-emu/Makefile b/arch/sparc/math-emu/Makefile
index d7642b2e9..b85e48924 100644
--- a/arch/sparc/math-emu/Makefile
+++ b/arch/sparc/math-emu/Makefile
@@ -14,13 +14,13 @@ O_OBJS   := math.o ashldi3.o fabss.o faddd.o faddq.o fadds.o 		\
 		fdtos.o fitoq.o fmovs.o fmuld.o fmulq.o fmuls.o		\
 		fnegs.o fqtod.o fqtoi.o fqtos.o fsmuld.o fsqrtd.o	\
 		fsqrtq.o fsqrts.o fstod.o fstoi.o fstoq.o fsubd.o 	\
-		fsubq.o	fsubs.o udivmodti4.o
+		fsubq.o	fsubs.o
 
 LINKS	 := double.h faddd.c faddq.c fadds.c fdivd.c fdivq.c fdivs.c	\
 		fdtoi.c fitoq.c fmuld.c fmulq.c fmuls.c fqtoi.c		\
 		fsqrtd.c fsqrtq.c fsqrts.c fstoi.c fsubd.c 		\
-		fsubq.c	fsubs.c op-1.h op-2.h op-4.h op-common.h quad.h	\
-		single.h soft-fp.h udivmodti4.c
+		fsubq.c	fsubs.c op-1.h op-2.h op-4.h op-8.h		\
+		op-common.h quad.h single.h soft-fp.h
 
 .S.s:
 	$(CPP) -D__ASSEMBLY__ -ansi $< -o $*.s
diff --git a/arch/sparc/math-emu/fabss.c b/arch/sparc/math-emu/fabss.c
index 5429cc733..7af16b7f9 100644
--- a/arch/sparc/math-emu/fabss.c
+++ b/arch/sparc/math-emu/fabss.c
@@ -1,6 +1,12 @@
+/* $Id: fabss.c,v 1.8 1999/05/28 13:41:33 jj Exp $
+ * arch/sparc/math-emu/fabss.c
+ *
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
 int FABSS(unsigned long *rd, unsigned long *rs2)
 {
-	/* Clear the sign bit (high bit of word 0) */
 	rd[0] = rs2[0] & 0x7fffffffUL;
 	return 0;
 }
diff --git a/arch/sparc/math-emu/fcmpd.c b/arch/sparc/math-emu/fcmpd.c
index 8adb30d88..05a460eee 100644
--- a/arch/sparc/math-emu/fcmpd.c
+++ b/arch/sparc/math-emu/fcmpd.c
@@ -1,18 +1,33 @@
+/* $Id: fcmpd.c,v 1.8 1999/05/28 13:41:36 jj Exp $
+ * arch/sparc/math-emu/fcmpd.c
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
+#include "sfp-util.h"
 #include "soft-fp.h"
 #include "double.h"
 
 int FCMPD(void *rd, void *rs2, void *rs1)
 {
+	FP_DECL_EX;
 	FP_DECL_D(A); FP_DECL_D(B);
 	long ret;
-	unsigned long *fsr = rd;
+	unsigned long fsr;
 	
-	__FP_UNPACK_D(A, rs1);
-	__FP_UNPACK_D(B, rs2);
-	FP_CMP_D(ret, B, A, 2);
-	if (ret == -1)
-		ret = 2;
-
-	*fsr = (*fsr & ~0xc00) | (ret << 10); 
-	return 0;
+	FP_UNPACK_RAW_DP(A, rs1);
+	FP_UNPACK_RAW_DP(B, rs2);
+	FP_CMP_D(ret, B, A, 3);
+	if (ret == 3 && (FP_ISSIGNAN_D(A) || FP_ISSIGNAN_D(B)))
+		FP_SET_EXCEPTION(FP_EX_INVALID);
+	if (!FP_INHIBIT_RESULTS) {
+		if (ret == -1) ret = 2;
+		fsr = *(long *)rd;
+		fsr &= ~0xc00;
+		fsr |= (ret << 10);
+		*(long *)rd = fsr;
+	}
+	FP_HANDLE_EXCEPTIONS;
 }
diff --git a/arch/sparc/math-emu/fcmped.c b/arch/sparc/math-emu/fcmped.c
index 2033b1dc8..5b634da28 100644
--- a/arch/sparc/math-emu/fcmped.c
+++ b/arch/sparc/math-emu/fcmped.c
@@ -1,18 +1,33 @@
+/* $Id: fcmped.c,v 1.8 1999/05/28 13:41:38 jj Exp $
+ * arch/sparc/math-emu/fcmped.c
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
+#include "sfp-util.h"
 #include "soft-fp.h"
 #include "double.h"
 
 int FCMPED(void *rd, void *rs2, void *rs1)
 {
+	FP_DECL_EX;
 	FP_DECL_D(A); FP_DECL_D(B);
 	long ret;
-	unsigned long *fsr = rd;
+	unsigned long fsr;
 	
-	__FP_UNPACK_D(A, rs1);
-	__FP_UNPACK_D(B, rs2);
-	FP_CMP_D(ret, B, A, 2);
-	if (ret == -1)
-		ret = 2;
-
-	*fsr = (*fsr & ~0xc00) | (ret << 10); 
-	return 0;
+	FP_UNPACK_RAW_DP(A, rs1);
+	FP_UNPACK_RAW_DP(B, rs2);
+	FP_CMP_D(ret, B, A, 3);
+	if (ret == 3)
+		FP_SET_EXCEPTION(FP_EX_INVALID);
+	if (!FP_INHIBIT_RESULTS) {
+		if (ret == -1) ret = 2;
+		fsr = *(long *)rd;
+		fsr &= ~0xc00;
+		fsr |= (ret << 10);
+		*(long *)rd = fsr;
+	}
+	FP_HANDLE_EXCEPTIONS;
 }
diff --git a/arch/sparc/math-emu/fcmpeq.c b/arch/sparc/math-emu/fcmpeq.c
index de99bf343..eb76019ab 100644
--- a/arch/sparc/math-emu/fcmpeq.c
+++ b/arch/sparc/math-emu/fcmpeq.c
@@ -1,18 +1,33 @@
+/* $Id: fcmpeq.c,v 1.8 1999/05/28 13:41:42 jj Exp $
+ * arch/sparc/math-emu/fcmpeq.c
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
+#include "sfp-util.h"
 #include "soft-fp.h"
 #include "quad.h"
 
 int FCMPEQ(void *rd, void *rs2, void *rs1)
 {
+	FP_DECL_EX;
 	FP_DECL_Q(A); FP_DECL_Q(B);
 	long ret;
 	unsigned long fsr;
 	
-	__FP_UNPACK_Q(A, rs1);
-	__FP_UNPACK_Q(B, rs2);
+	FP_UNPACK_RAW_QP(A, rs1);
+	FP_UNPACK_RAW_QP(B, rs2);
 	FP_CMP_Q(ret, B, A, 3);
-	if (ret == -1) ret = 2;
-	fsr = *(unsigned long *)rd;
-	fsr &= ~0xc00; fsr |= (ret << 10);
-	*(unsigned long *)rd = fsr;
-	return 0;
+	if (ret == 3)
+		FP_SET_EXCEPTION(FP_EX_INVALID);
+	if (!FP_INHIBIT_RESULTS) {
+		if (ret == -1) ret = 2;
+		fsr = *(long *)rd;
+		fsr &= ~0xc00;
+		fsr |= (ret << 10);
+		*(long *)rd = fsr;
+	}
+	FP_HANDLE_EXCEPTIONS;
 }
diff --git a/arch/sparc/math-emu/fcmpes.c b/arch/sparc/math-emu/fcmpes.c
index a078a1243..365937edc 100644
--- a/arch/sparc/math-emu/fcmpes.c
+++ b/arch/sparc/math-emu/fcmpes.c
@@ -1,18 +1,33 @@
+/* $Id: fcmpes.c,v 1.8 1999/05/28 13:41:45 jj Exp $
+ * arch/sparc/math-emu/fcmpes.c
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
+#include "sfp-util.h"
 #include "soft-fp.h"
 #include "single.h"
 
 int FCMPES(void *rd, void *rs2, void *rs1)
 {
+	FP_DECL_EX;
 	FP_DECL_S(A); FP_DECL_S(B);
 	long ret;
-	unsigned long *fsr = rd;
+	unsigned long fsr;
 	
-	__FP_UNPACK_S(A, rs1);
-	__FP_UNPACK_S(B, rs2);
-	FP_CMP_S(ret, B, A, 1);
-	if (ret == -1)
-		ret = 2;
-
-	*fsr = (*fsr & ~0xc00) | (ret << 10); 
-	return 0;
+	FP_UNPACK_RAW_SP(A, rs1);
+	FP_UNPACK_RAW_SP(B, rs2);
+	FP_CMP_S(ret, B, A, 3);
+	if (ret == 3)
+		FP_SET_EXCEPTION(FP_EX_INVALID);
+	if (!FP_INHIBIT_RESULTS) {
+		if (ret == -1) ret = 2;
+		fsr = *(long *)rd;
+		fsr &= ~0xc00;
+		fsr |= (ret << 10);
+		*(long *)rd = fsr;
+	}
+	FP_HANDLE_EXCEPTIONS;
 }
diff --git a/arch/sparc/math-emu/fcmpq.c b/arch/sparc/math-emu/fcmpq.c
index f3d1b1233..ffaeb1a1e 100644
--- a/arch/sparc/math-emu/fcmpq.c
+++ b/arch/sparc/math-emu/fcmpq.c
@@ -1,18 +1,33 @@
+/* $Id: fcmpq.c,v 1.8 1999/05/28 13:41:48 jj Exp $
+ * arch/sparc/math-emu/fcmpq.c
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
+#include "sfp-util.h"
 #include "soft-fp.h"
 #include "quad.h"
 
 int FCMPQ(void *rd, void *rs2, void *rs1)
 {
+	FP_DECL_EX;
 	FP_DECL_Q(A); FP_DECL_Q(B);
 	long ret;
 	unsigned long fsr;
 	
-	__FP_UNPACK_Q(A, rs1);
-	__FP_UNPACK_Q(B, rs2);
+	FP_UNPACK_RAW_QP(A, rs1);
+	FP_UNPACK_RAW_QP(B, rs2);
 	FP_CMP_Q(ret, B, A, 3);
-	if (ret == -1) ret = 2;
-	fsr = *(unsigned long *)rd;
-	fsr &= ~0xc00; fsr |= (ret << 10);
-	*(unsigned long *)rd = fsr;
-	return 0;
+	if (ret == 3 && (FP_ISSIGNAN_Q(A) || FP_ISSIGNAN_Q(B)))
+		FP_SET_EXCEPTION(FP_EX_INVALID);
+	if (!FP_INHIBIT_RESULTS) {
+		if (ret == -1) ret = 2;
+		fsr = *(long *)rd;
+		fsr &= ~0xc00;
+		fsr |= (ret << 10);
+		*(long *)rd = fsr;
+	}
+	FP_HANDLE_EXCEPTIONS;
 }
diff --git a/arch/sparc/math-emu/fcmps.c b/arch/sparc/math-emu/fcmps.c
index 7e273320f..3be1315d7 100644
--- a/arch/sparc/math-emu/fcmps.c
+++ b/arch/sparc/math-emu/fcmps.c
@@ -1,18 +1,33 @@
+/* $Id: fcmps.c,v 1.8 1999/05/28 13:41:51 jj Exp $
+ * arch/sparc/math-emu/fcmps.c
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
+#include "sfp-util.h"
 #include "soft-fp.h"
 #include "single.h"
 
 int FCMPS(void *rd, void *rs2, void *rs1)
 {
+	FP_DECL_EX;
 	FP_DECL_S(A); FP_DECL_S(B);
 	long ret;
-	unsigned long *fsr = rd;
+	unsigned long fsr;
 	
-	__FP_UNPACK_S(A, rs1);
-	__FP_UNPACK_S(B, rs2);
-	FP_CMP_S(ret, B, A, 1);
-	if (ret == -1)
-		ret = 2;
-
-	*fsr = (*fsr & ~0xc00) | (ret << 10); 
-	return 0;
+	FP_UNPACK_RAW_SP(A, rs1);
+	FP_UNPACK_RAW_SP(B, rs2);
+	FP_CMP_S(ret, B, A, 3);
+	if (ret == 3 && (FP_ISSIGNAN_S(A) || FP_ISSIGNAN_S(B)))
+		FP_SET_EXCEPTION(FP_EX_INVALID);
+	if (!FP_INHIBIT_RESULTS) {
+		if (ret == -1) ret = 2;
+		fsr = *(long *)rd;
+		fsr &= ~0xc00;
+		fsr |= (ret << 10);
+		*(long *)rd = fsr;
+	}
+	FP_HANDLE_EXCEPTIONS;
 }
diff --git a/arch/sparc/math-emu/fdmulq.c b/arch/sparc/math-emu/fdmulq.c
index dd9c7953c..96772d4ef 100644
--- a/arch/sparc/math-emu/fdmulq.c
+++ b/arch/sparc/math-emu/fdmulq.c
@@ -1,15 +1,26 @@
+/* $Id: fdmulq.c,v 1.9 1999/05/28 13:41:56 jj Exp $
+ * arch/sparc/math-emu/fdmulq.c
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
+#include "sfp-util.h"
 #include "soft-fp.h"
 #include "quad.h"
 #include "double.h"
 
 int FDMULQ(void *rd, void *rs2, void *rs1)
 {
+	FP_DECL_EX;
 	FP_DECL_D(IN); FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(R);
 
-	__FP_UNPACK_D(IN, rs1);
+	FP_UNPACK_DP(IN, rs1);
 	FP_CONV(Q,D,4,2,A,IN);
-	__FP_UNPACK_D(IN, rs2);
+	FP_UNPACK_DP(IN, rs2);
 	FP_CONV(Q,D,4,2,B,IN);
 	FP_MUL_Q(R, A, B);
-	return __FP_PACK_Q(rd, R);
+	FP_PACK_QP(rd, R);
+	FP_HANDLE_EXCEPTIONS;
 }
diff --git a/arch/sparc/math-emu/fdtoq.c b/arch/sparc/math-emu/fdtoq.c
index 7b7746821..701b1ff3f 100644
--- a/arch/sparc/math-emu/fdtoq.c
+++ b/arch/sparc/math-emu/fdtoq.c
@@ -1,12 +1,23 @@
+/* $Id: fdtoq.c,v 1.9 1999/05/28 13:42:01 jj Exp $
+ * arch/sparc/math-emu/fdtoq.c
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
+#include "sfp-util.h"
 #include "soft-fp.h"
 #include "quad.h"
 #include "double.h"
 
 int FDTOQ(void *rd, void *rs2)
 {
+	FP_DECL_EX;
 	FP_DECL_D(A); FP_DECL_Q(R);
 
-	__FP_UNPACK_D(A, rs2);
+	FP_UNPACK_DP(A, rs2);
 	FP_CONV(Q,D,4,2,R,A);
-	return __FP_PACK_Q(rd, R);
+	FP_PACK_QP(rd, R);
+	FP_HANDLE_EXCEPTIONS;
 }
diff --git a/arch/sparc/math-emu/fdtos.c b/arch/sparc/math-emu/fdtos.c
index 612434c40..63951befa 100644
--- a/arch/sparc/math-emu/fdtos.c
+++ b/arch/sparc/math-emu/fdtos.c
@@ -1,12 +1,23 @@
+/* $Id: fdtos.c,v 1.9 1999/05/28 13:42:03 jj Exp $
+ * arch/sparc/math-emu/fdtos.c
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
+#include "sfp-util.h"
 #include "soft-fp.h"
 #include "double.h"
 #include "single.h"
 
 int FDTOS(void *rd, void *rs2)
 {
+	FP_DECL_EX;
 	FP_DECL_D(A); FP_DECL_S(R);
 
-	__FP_UNPACK_D(A, rs2);
+	FP_UNPACK_DP(A, rs2);
 	FP_CONV(S,D,1,2,R,A);
-	return __FP_PACK_S(rd, R);
+	FP_PACK_SP(rd, R);
+	FP_HANDLE_EXCEPTIONS;
 }
diff --git a/arch/sparc/math-emu/fmovs.c b/arch/sparc/math-emu/fmovs.c
index f113c0bb1..77087a524 100644
--- a/arch/sparc/math-emu/fmovs.c
+++ b/arch/sparc/math-emu/fmovs.c
@@ -1,3 +1,10 @@
+/* $Id: fmovs.c,v 1.7 1999/05/28 13:42:05 jj Exp $
+ * arch/sparc/math-emu/fmovs.c
+ *
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
 int FMOVS(unsigned long *rd, unsigned long *rs2)
 {
 	rd[0] = rs2[0];
diff --git a/arch/sparc/math-emu/fnegs.c b/arch/sparc/math-emu/fnegs.c
index 26a90d778..da2d36ef5 100644
--- a/arch/sparc/math-emu/fnegs.c
+++ b/arch/sparc/math-emu/fnegs.c
@@ -1,3 +1,10 @@
+/* $Id: fnegs.c,v 1.9 1999/05/28 13:42:06 jj Exp $
+ * arch/sparc/math-emu/fnegs.c
+ *
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
 int FNEGS(unsigned long *rd, unsigned long *rs2)
 {
  	/* just change the sign bit */
diff --git a/arch/sparc/math-emu/fqtod.c b/arch/sparc/math-emu/fqtod.c
index 62a437e31..c8aa8edf1 100644
--- a/arch/sparc/math-emu/fqtod.c
+++ b/arch/sparc/math-emu/fqtod.c
@@ -1,12 +1,23 @@
+/* $Id: fqtod.c,v 1.9 1999/05/28 13:42:08 jj Exp $
+ * arch/sparc/math-emu/fqtod.c
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
+#include "sfp-util.h"
 #include "soft-fp.h"
 #include "quad.h"
 #include "double.h"
 
 int FQTOD(void *rd, void *rs2)
 {
+	FP_DECL_EX;
 	FP_DECL_Q(A); FP_DECL_D(R);
 
-	__FP_UNPACK_Q(A, rs2);
+	FP_UNPACK_QP(A, rs2);
 	FP_CONV(D,Q,2,4,R,A);
-	return __FP_PACK_D(rd, R);
+	FP_PACK_DP(rd, R);
+	FP_HANDLE_EXCEPTIONS;
 }
diff --git a/arch/sparc/math-emu/fqtos.c b/arch/sparc/math-emu/fqtos.c
index 2520affbf..142dfa9a8 100644
--- a/arch/sparc/math-emu/fqtos.c
+++ b/arch/sparc/math-emu/fqtos.c
@@ -1,12 +1,23 @@
+/* $Id: fqtos.c,v 1.9 1999/05/28 13:42:10 jj Exp $
+ * arch/sparc/math-emu/fqtos.c
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
+#include "sfp-util.h"
 #include "soft-fp.h"
 #include "quad.h"
 #include "single.h"
 
 int FQTOS(void *rd, void *rs2)
 {
+	FP_DECL_EX;
 	FP_DECL_Q(A); FP_DECL_S(R);
 
-	__FP_UNPACK_Q(A, rs2);
+	FP_UNPACK_QP(A, rs2);
 	FP_CONV(S,Q,1,4,R,A);
-	return __FP_PACK_S(rd, R);
+	FP_PACK_SP(rd, R);
+	FP_HANDLE_EXCEPTIONS;
 }
diff --git a/arch/sparc/math-emu/fsmuld.c b/arch/sparc/math-emu/fsmuld.c
index b7b992818..2f873e501 100644
--- a/arch/sparc/math-emu/fsmuld.c
+++ b/arch/sparc/math-emu/fsmuld.c
@@ -1,15 +1,26 @@
+/* $Id: fsmuld.c,v 1.9 1999/05/28 13:42:12 jj Exp $
+ * arch/sparc/math-emu/fsmuld.c
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
+#include "sfp-util.h"
 #include "soft-fp.h"
 #include "double.h"
 #include "single.h"
 
 int FSMULD(void *rd, void *rs2, void *rs1)
 {
+	FP_DECL_EX;
 	FP_DECL_S(IN); FP_DECL_D(A); FP_DECL_D(B); FP_DECL_D(R);
 
-	__FP_UNPACK_S(IN, rs1);
+	FP_UNPACK_SP(IN, rs1);
 	FP_CONV(D,S,2,1,A,IN);
-	__FP_UNPACK_S(IN, rs2);
+	FP_UNPACK_SP(IN, rs2);
 	FP_CONV(D,S,2,1,B,IN);
 	FP_MUL_D(R, A, B);
-	return __FP_PACK_D(rd, R);
+	FP_PACK_DP(rd, R);
+	FP_HANDLE_EXCEPTIONS;
 }
diff --git a/arch/sparc/math-emu/fstod.c b/arch/sparc/math-emu/fstod.c
index ea73660d8..a6206bf20 100644
--- a/arch/sparc/math-emu/fstod.c
+++ b/arch/sparc/math-emu/fstod.c
@@ -1,12 +1,23 @@
+/* $Id: fstod.c,v 1.9 1999/05/28 13:42:14 jj Exp $
+ * arch/sparc/math-emu/fstod.c
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
+#include "sfp-util.h"
 #include "soft-fp.h"
 #include "double.h"
 #include "single.h"
 
 int FSTOD(void *rd, void *rs2)
 {
+	FP_DECL_EX;
 	FP_DECL_S(A); FP_DECL_D(R);
 
-	__FP_UNPACK_S(A, rs2);
+	FP_UNPACK_SP(A, rs2);
 	FP_CONV(D,S,2,1,R,A);
-	return __FP_PACK_D(rd, R);
+	FP_PACK_DP(rd, R);
+	FP_HANDLE_EXCEPTIONS;
 }
diff --git a/arch/sparc/math-emu/fstoq.c b/arch/sparc/math-emu/fstoq.c
index 7d201310c..e2257c214 100644
--- a/arch/sparc/math-emu/fstoq.c
+++ b/arch/sparc/math-emu/fstoq.c
@@ -1,12 +1,23 @@
+/* $Id: fstoq.c,v 1.9 1999/05/28 13:42:16 jj Exp $
+ * arch/sparc/math-emu/fstoq.c
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ *
+ */
+
+#include "sfp-util.h"
 #include "soft-fp.h"
 #include "quad.h"
 #include "single.h"
 
 int FSTOQ(void *rd, void *rs2)
 {
+	FP_DECL_EX;
 	FP_DECL_S(A); FP_DECL_Q(R);
 
-	__FP_UNPACK_S(A, rs2);
+	FP_UNPACK_SP(A, rs2);
 	FP_CONV(Q,S,4,1,R,A);
-	return __FP_PACK_Q(rd, R);
+	FP_PACK_QP(rd, R);
+	FP_HANDLE_EXCEPTIONS;
 }
diff --git a/arch/sparc/math-emu/math.c b/arch/sparc/math-emu/math.c
index 68ccb932a..15690b21e 100644
--- a/arch/sparc/math-emu/math.c
+++ b/arch/sparc/math-emu/math.c
@@ -1,26 +1,19 @@
-/* 
+/*
  * arch/sparc/math-emu/math.c
  *
  * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
- * Based on the sparc64 code by Jakub Jelinek.
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1999 David S. Miller (davem@redhat.com)
  *
  * This is a good place to start if you're trying to understand the
- * emulation code, because it's pretty simple. What we do is 
+ * emulation code, because it's pretty simple. What we do is
  * essentially analyse the instruction to work out what the operation
  * is and which registers are involved. We then execute the appropriate
  * FXXXX function. [The floating point queue introduces a minor wrinkle;
  * see below...]
  * The fxxxxx.c files each emulate a single insn. They look relatively
  * simple because the complexity is hidden away in an unholy tangle
- * of preprocessor macros. 
- *
- * WARNING : don't look at the macro definitions unless you 
- * absolutely have to! They're extremely ugly, rather complicated
- * and a single line in an fxxxx.c file can expand to the equivalent 
- * of  30 lines or more of C. Of course, any error in those 30 lines 
- * is reported by the compiler as an error in the single line with the
- * macro usage...
- * Question: should we replace them with inline functions?
+ * of preprocessor macros.
  *
  * The first layer of macros is single.h, double.h, quad.h. Generally
  * these files define macros for working with floating point numbers
@@ -29,11 +22,11 @@
  * generic macros (in this case _FP_ADD(D,2,R,X,Y) where the number
  * of machine words required to store the given IEEE format is passed
  * as a parameter. [double.h and co check the number of bits in a word
- * and define FP_ADD_D & co appropriately]. 
+ * and define FP_ADD_D & co appropriately].
  * The generic macros are defined in op-common.h. This is where all
  * the grotty stuff like handling NaNs is coded. To handle the possible
  * word sizes macros in op-common.h use macros like _FP_FRAC_SLL_##wc()
- * where wc is the 'number of machine words' parameter (here 2). 
+ * where wc is the 'number of machine words' parameter (here 2).
  * These are defined in the third layer of macros: op-1.h, op-2.h
  * and op-4.h. These handle operations on floating point numbers composed
  * of 1,2 and 4 machine words respectively. [For example, on sparc64
@@ -41,7 +34,7 @@
  * constructs in op-1.h, but on sparc32 they use op-2.h definitions.]
  * soft-fp.h is on the same level as op-common.h, and defines some
  * macros which are independent of both word size and FP format.
- * Finally, sfp-machine.h is the machine dependent part of the 
+ * Finally, sfp-machine.h is the machine dependent part of the
  * code: it defines the word size and what type a word is. It also
  * defines how _FP_MUL_MEAT_t() maps to _FP_MUL_MEAT_n_* : op-n.h
  * provide several possible flavours of multiply algorithm, most
@@ -64,59 +57,11 @@
  * so we follow that practice...
  */
 
-/* WISHLIST:
- *
- * + Replace all the macros with inline functions. These should
- * have the same effect but be much easier to work with.
- *
- * + Emulate the IEEE exception flags. We don't currently do this
- * because a) it would require significant alterations to
- * the emulation macros [see the comments about _FP_NEG()
- * in op-common.c and note that we'd need to invent a convention
- * for passing in the flags to FXXXX fns and returning them] and 
- * b) SPARClinux doesn't let users access the flags anyway 
- * [contrast Solaris, which allows you to examine, clear or set
- * the flags, and request that exceptions cause SIGFPE 
- * [which you then set up a signal handler for, obviously...]].
- * Erm, (b) may quite possibly be garbage. %fsr is user-writable
- * so you don't need a syscall. There may or may not be library
- * support.
- *
- * + Emulation of FMULQ, FDIVQ, FSQRTQ, FDMULQ needs to be 
- * written!
- * 
- * + reindent code to conform to Linux kernel standard :->
- *
- * + work out whether all the compile-time warnings are bogus
- *
- * + check that conversion to/from integers works
- * 
- * + check with the SPARC architecture manual to see if we resolve
- * the implementation-dependent bits of the IEEE spec in the
- * same manner as the hardware.
- *
- * + more test cases for the test script always welcome!
- *
- * + illegal opcodes currently cause SIGFPEs. We should arrange
- * to tell the traps.c code to SIGILL instead. Currently,
- * everywhere that we return 0 should cause SIGILL, I think.
- * SIGFPE should only be caused if we set an IEEE exception bit
- * and the relevant trap bit is also set. (this means that 
- * traps.c should do this; also it should handle the case of
- * IEEE exception generated directly by the hardware.)
- * Should illegal_fp_register (which is a flavour of fp exception)
- * cause SIGFPE or  SIGILL?
- *
- * + the test script needs to be extended to handle the quadword
- * and comparison insns.
- *
- * + _FP_DIV_MEAT_2_udiv_64() appears to work but it should be
- * checked by somebody who understands the algorithm :->
- * 
- * + fpsave() saves the FP queue but fpload() doesn't reload it.
+/* TODO:
+ * fpsave() saves the FP queue but fpload() doesn't reload it.
  * Therefore when we context switch or change FPU ownership
  * we have to check to see if the queue had anything in it and
- * emulate it if it did. This is going to be a pain. 
+ * emulate it if it did. This is going to be a pain.
  */
 
 #include <linux/types.h>
@@ -124,38 +69,21 @@
 #include <linux/mm.h>
 #include <asm/uaccess.h>
 
+#include "sfp-util.h"
 #include "soft-fp.h"
 
 #define FLOATFUNC(x) extern int x(void *,void *,void *)
 
-/* Current status: we don't properly emulate the difficult quadword
- * insns (MUL, DIV, SQRT).
- * There are also some ops involving the FP registers which we don't
- * emulate: the branch on FP condition flags and the load/store to
- * FP regs or FSR. I'm assuming that these will never generate traps
- * (not unreasonable if there's an FPU at all; comments in the NetBSD
- * kernel source agree on this point). If we wanted to allow
- * purely software-emulation of the FPU with FPU totally disabled
- * or non-existent, we'd have to emulate these as well. We'd also
- * need to alter the fp_disabled trap handler to call the math-emu
- * code appropriately. The structure of do_one_mathemu() is also
- * inappropriate for these ops (as it has no way to alter the pc, 
- * for a start) and it might be better to special-case them in do_mathemu().
- * Oh, and you'd need to alter the traps.c code so it didn't try to
- * fpsave() and fpload(). If there's genuinely no FPU then there's 
- * probably bits of kernel stuff that just won't work anyway...
- */
-
 /* The Vn labels indicate what version of the SPARC architecture gas thinks
- * each insn is. This is from the binutils source :-> 
+ * each insn is. This is from the binutils source :->
  */
 /* quadword instructions */
-FLOATFUNC(FSQRTQ);                                /* v8 NYI */
+FLOATFUNC(FSQRTQ);                                /* v8 */
 FLOATFUNC(FADDQ);                                 /* v8 */
 FLOATFUNC(FSUBQ);                                 /* v8 */
-FLOATFUNC(FMULQ);                                 /* v8 NYI */
-FLOATFUNC(FDIVQ);                                 /* v8 NYI */
-FLOATFUNC(FDMULQ);                                /* v8 NYI */
+FLOATFUNC(FMULQ);                                 /* v8 */
+FLOATFUNC(FDIVQ);                                 /* v8 */
+FLOATFUNC(FDMULQ);                                /* v8 */
 FLOATFUNC(FQTOS);                                 /* v8 */
 FLOATFUNC(FQTOD);                                 /* v8 */
 FLOATFUNC(FITOQ);                                 /* v8 */
@@ -197,7 +125,7 @@ FLOATFUNC(FITOD);                                 /* v6 */
 #define FSR_CEXC_SHIFT	0UL
 #define FSR_CEXC_MASK	(0x1fUL << FSR_CEXC_SHIFT)
 
-static int do_one_mathemu(u32 insn, unsigned long *fsr, unsigned long *fregs);   
+static int do_one_mathemu(u32 insn, unsigned long *fsr, unsigned long *fregs);
 
 /* Unlike the Sparc64 version (which has a struct fpustate), we
  * pass the taskstruct corresponding to the task which currently owns the
@@ -210,65 +138,65 @@ static int do_one_mathemu(u32 insn, unsigned long *fsr, unsigned long *fregs);
  */
 int do_mathemu(struct pt_regs *regs, struct task_struct *fpt)
 {
-   /* regs->pc isn't necessarily the PC at which the offending insn is sitting.
-    * The FPU maintains a queue of FPops which cause traps. 
-    * When it hits an instruction that requires that the trapped op succeeded
-    * (usually because it reads a reg. that the trapped op wrote) then it
-    * causes this exception. We need to emulate all the insns on the queue
-    * and then allow the op to proceed.
-    * This code should also handle the case where the trap was precise,
-    * in which case the queue length is zero and regs->pc points at the 
-    * single FPop to be emulated. (this case is untested, though :->) 
-    * You'll need this case if you want to be able to emulate all FPops
-    * because the FPU either doesn't exist or has been software-disabled.
-    * [The UltraSPARC makes FP a precise trap; this isn't as stupid as it 
-    * might sound because the Ultra does funky things with a superscalar
-    * architecture.]
-    */
-   
-   /* You wouldn't believe how often I typed 'ftp' when I meant 'fpt' :-> */
+	/* regs->pc isn't necessarily the PC at which the offending insn is sitting.
+	 * The FPU maintains a queue of FPops which cause traps.
+	 * When it hits an instruction that requires that the trapped op succeeded
+	 * (usually because it reads a reg. that the trapped op wrote) then it
+	 * causes this exception. We need to emulate all the insns on the queue
+	 * and then allow the op to proceed.
+	 * This code should also handle the case where the trap was precise,
+	 * in which case the queue length is zero and regs->pc points at the
+	 * single FPop to be emulated. (this case is untested, though :->)
+	 * You'll need this case if you want to be able to emulate all FPops
+	 * because the FPU either doesn't exist or has been software-disabled.
+	 * [The UltraSPARC makes FP a precise trap; this isn't as stupid as it
+	 * might sound because the Ultra does funky things with a superscalar
+	 * architecture.]
+	 */
+
+	/* You wouldn't believe how often I typed 'ftp' when I meant 'fpt' :-> */
 
-   int i;
-   int retcode = 0;                               /* assume all succeed */
-   unsigned long insn;
-   
-#ifdef DEBUG_MATHEMU   
-   printk("In do_mathemu()... pc is %08lx\n", regs->pc);
-   printk("fpqdepth is %ld\n",fpt->tss.fpqdepth);
-   for (i = 0; i < fpt->tss.fpqdepth; i++)
-      printk("%d: %08lx at %08lx\n",i,fpt->tss.fpqueue[i].insn, (unsigned long)fpt->tss.fpqueue[i].insn_addr);
-#endif      
+	int i;
+	int retcode = 0;                               /* assume all succeed */
+	unsigned long insn;
 
-   if (fpt->tss.fpqdepth == 0) {                   /* no queue, guilty insn is at regs->pc */
-#ifdef DEBUG_MATHEMU   
-      printk("precise trap at %08lx\n", regs->pc);
+#ifdef DEBUG_MATHEMU
+	printk("In do_mathemu()... pc is %08lx\n", regs->pc);
+	printk("fpqdepth is %ld\n", fpt->tss.fpqdepth);
+	for (i = 0; i < fpt->tss.fpqdepth; i++)
+		printk("%d: %08lx at %08lx\n", i, fpt->tss.fpqueue[i].insn,
+		       (unsigned long)fpt->tss.fpqueue[i].insn_addr);
 #endif
-      if (!get_user(insn, (u32 *)regs->pc)) {
-         retcode = do_one_mathemu(insn, &fpt->tss.fsr, fpt->tss.float_regs);
-         if (retcode) {
-            /* in this case we need to fix up PC & nPC */
-            regs->pc = regs->npc;
-            regs->npc += 4;
-         }
-      }
-      return retcode;
-   }
 
-   /* Normal case: need to empty the queue... */
-   for (i = 0; i < fpt->tss.fpqdepth; i++)
-   {
-      retcode = do_one_mathemu(fpt->tss.fpqueue[i].insn, &(fpt->tss.fsr), fpt->tss.float_regs);
-      if (!retcode)                               /* insn failed, no point doing any more */
-         break;
-   }
-   /* Now empty the queue and clear the queue_not_empty flag */
-   if(retcode)
-	   fpt->tss.fsr &= ~(0x3000 | FSR_CEXC_MASK);
-   else
-	   fpt->tss.fsr &= ~0x3000;
-   fpt->tss.fpqdepth = 0;
-   
-   return retcode;
+	if (fpt->tss.fpqdepth == 0) {                   /* no queue, guilty insn is at regs->pc */
+#ifdef DEBUG_MATHEMU
+		printk("precise trap at %08lx\n", regs->pc);
+#endif
+		if (!get_user(insn, (u32 *)regs->pc)) {
+			retcode = do_one_mathemu(insn, &fpt->tss.fsr, fpt->tss.float_regs);
+			if (retcode) {
+				/* in this case we need to fix up PC & nPC */
+				regs->pc = regs->npc;
+				regs->npc += 4;
+			}
+		}
+		return retcode;
+	}
+
+	/* Normal case: need to empty the queue... */
+	for (i = 0; i < fpt->tss.fpqdepth; i++) {
+		retcode = do_one_mathemu(fpt->tss.fpqueue[i].insn, &(fpt->tss.fsr), fpt->tss.float_regs);
+		if (!retcode)                               /* insn failed, no point doing any more */
+			break;
+	}
+	/* Now empty the queue and clear the queue_not_empty flag */
+	if(retcode)
+		fpt->tss.fsr &= ~(0x3000 | FSR_CEXC_MASK);
+	else
+		fpt->tss.fsr &= ~0x3000;
+	fpt->tss.fpqdepth = 0;
+
+	return retcode;
 }
 
 /* All routines returning an exception to raise should detect
@@ -291,46 +219,36 @@ static int record_exception(unsigned long *pfsr, int eflag)
 	if(would_trap != 0) {
 		eflag &= ((fsr & FSR_TEM_MASK) >> FSR_TEM_SHIFT);
 		if((eflag & (eflag - 1)) != 0) {
-			if(eflag & EFLAG_INVALID)
-				eflag = EFLAG_INVALID;
-			else if(eflag & EFLAG_DIVZERO)
-				eflag = EFLAG_DIVZERO;
-			else if(eflag & EFLAG_INEXACT)
-				eflag = EFLAG_INEXACT;
+			if(eflag & FP_EX_INVALID)
+				eflag = FP_EX_INVALID;
+			else if(eflag & FP_EX_OVERFLOW)
+				eflag = FP_EX_OVERFLOW;
+			else if(eflag & FP_EX_UNDERFLOW)
+				eflag = FP_EX_UNDERFLOW;
+			else if(eflag & FP_EX_DIVZERO)
+				eflag = FP_EX_DIVZERO;
+			else if(eflag & FP_EX_INEXACT)
+				eflag = FP_EX_INEXACT;
 		}
 	}
 
-	/* Set CEXC, here are the rules:
+	/* Set CEXC, here is the rule:
 	 *
-	 * 1) In general all FPU ops will set one and only one
+	 *    In general all FPU ops will set one and only one
 	 *    bit in the CEXC field, this is always the case
 	 *    when the IEEE exception trap is enabled in TEM.
-	 *
-	 * 2) As a special case, if an overflow or underflow
-	 *    is being signalled, AND the trap is not enabled
-	 *    in TEM, then the inexact field shall also be set.
 	 */
 	fsr &= ~(FSR_CEXC_MASK);
-	if(would_trap ||
-	   (eflag & (EFLAG_OVERFLOW | EFLAG_UNDERFLOW)) == 0) {
-		fsr |= ((long)eflag << FSR_CEXC_SHIFT);
-	} else {
-		fsr |= (((long)eflag << FSR_CEXC_SHIFT) |
-			(EFLAG_INEXACT << FSR_CEXC_SHIFT));
-	}
+	fsr |= ((long)eflag << FSR_CEXC_SHIFT);
 
-	/* Set the AEXC field, rules are:
+	/* Set the AEXC field, rule is:
 	 *
-	 * 1) If a trap would not be generated, the
+	 *    If a trap would not be generated, the
 	 *    CEXC just generated is OR'd into the
 	 *    existing value of AEXC.
-	 *
-	 * 2) When a trap is generated, AEXC is cleared.
 	 */
 	if(would_trap == 0)
 		fsr |= ((long)eflag << FSR_AEXC_SHIFT);
-	else
-		fsr &= ~(FSR_AEXC_MASK);
 
 	/* If trapping, indicate fault trap type IEEE. */
 	if(would_trap != 0)
@@ -343,157 +261,150 @@ static int record_exception(unsigned long *pfsr, int eflag)
 
 static int do_one_mathemu(u32 insn, unsigned long *fsr, unsigned long *fregs)
 {
-   /* Emulate the given insn, updating fsr and fregs appropriately. */
-   int type = 0; 
-   /* 01 is single, 10 is double, 11 is quad, 
-    * 000011 is rs1, 001100 is rs2, 110000 is rd (00 in rd is fcc)
-    * 111100000000 tells which ftt that may happen in 
-    * (this field not used on sparc32 code, as we can't 
-    * extract trap type info for ops on the FP queue) 
-    */
-   int freg, eflag;
-   int (*func)(void *,void *,void *) = NULL;
-   void *rs1 = NULL, *rs2 = NULL, *rd = NULL;   
+	/* Emulate the given insn, updating fsr and fregs appropriately. */
+	int type = 0;
+	/* 01 is single, 10 is double, 11 is quad,
+	 * 000011 is rs1, 001100 is rs2, 110000 is rd (00 in rd is fcc)
+	 * 111100000000 tells which ftt that may happen in
+	 * (this field not used on sparc32 code, as we can't
+	 * extract trap type info for ops on the FP queue)
+	 */
+	int freg, eflag;
+	int (*func)(void *,void *,void *) = NULL;
+	void *rs1 = NULL, *rs2 = NULL, *rd = NULL;
+
+#ifdef DEBUG_MATHEMU
+	printk("In do_mathemu(), emulating %08lx\n", insn);
+#endif
 
+	if ((insn & 0xc1f80000) == 0x81a00000)	/* FPOP1 */ {
+		switch ((insn >> 5) & 0x1ff) {
+		/* QUAD - ftt == 3 */
+		case 0x001: type = 0x314; func = FMOVS; break;
+		case 0x005: type = 0x314; func = FNEGS; break;
+		case 0x009: type = 0x314; func = FABSS; break;
+		case 0x02b: type = 0x33c; func = FSQRTQ; break;
+		case 0x043: type = 0x33f; func = FADDQ; break;
+		case 0x047: type = 0x33f; func = FSUBQ; break;
+		case 0x04b: type = 0x33f; func = FMULQ; break;
+		case 0x04f: type = 0x33f; func = FDIVQ; break;
+		case 0x06e: type = 0x33a; func = FDMULQ; break;
+		case 0x0c7: type = 0x31c; func = FQTOS; break;
+		case 0x0cb: type = 0x32c; func = FQTOD; break;
+		case 0x0cc: type = 0x334; func = FITOQ; break;
+		case 0x0cd: type = 0x334; func = FSTOQ; break;
+		case 0x0ce: type = 0x338; func = FDTOQ; break;
+		case 0x0d3: type = 0x31c; func = FQTOI; break;
+		/* SUBNORMAL - ftt == 2 */
+		case 0x029: type = 0x214; func = FSQRTS; break;
+		case 0x02a: type = 0x228; func = FSQRTD; break;
+		case 0x041: type = 0x215; func = FADDS; break;
+		case 0x042: type = 0x22a; func = FADDD; break;
+		case 0x045: type = 0x215; func = FSUBS; break;
+		case 0x046: type = 0x22a; func = FSUBD; break;
+		case 0x049: type = 0x215; func = FMULS; break;
+		case 0x04a: type = 0x22a; func = FMULD; break;
+		case 0x04d: type = 0x215; func = FDIVS; break;
+		case 0x04e: type = 0x22a; func = FDIVD; break;
+		case 0x069: type = 0x225; func = FSMULD; break;
+		case 0x0c6: type = 0x218; func = FDTOS; break;
+		case 0x0c9: type = 0x224; func = FSTOD; break;
+		case 0x0d1: type = 0x214; func = FSTOI; break;
+		case 0x0d2: type = 0x218; func = FDTOI; break;
+		default:
+#ifdef DEBUG_MATHEMU
+			printk("unknown FPop1: %03lx\n",(insn>>5)&0x1ff);
+#endif
+		}
+	} else if ((insn & 0xc1f80000) == 0x81a80000)	/* FPOP2 */ {
+		switch ((insn >> 5) & 0x1ff) {
+		case 0x051: type = 0x305; func = FCMPS; break;
+		case 0x052: type = 0x30a; func = FCMPD; break;
+		case 0x053: type = 0x30f; func = FCMPQ; break;
+		case 0x055: type = 0x305; func = FCMPES; break;
+		case 0x056: type = 0x30a; func = FCMPED; break;
+		case 0x057: type = 0x30f; func = FCMPEQ; break;
+		default:
 #ifdef DEBUG_MATHEMU
-   printk("In do_mathemu(), emulating %08lx\n", insn);
-#endif   
-      
-   if ((insn & 0xc1f80000) == 0x81a00000) /* FPOP1 */ {
-      switch ((insn >> 5) & 0x1ff) {
-         /* QUAD - ftt == 3 */
-         case 0x001: type = 0x314; func = FMOVS; break;
-         case 0x005: type = 0x314; func = FNEGS; break;
-         case 0x009: type = 0x314; func = FABSS; break;
-         case 0x02b: type = 0x33c; func = FSQRTQ; break;
-         case 0x043: type = 0x33f; func = FADDQ; break;
-         case 0x047: type = 0x33f; func = FSUBQ; break;
-         case 0x04b: type = 0x33f; func = FMULQ; break;
-         case 0x04f: type = 0x33f; func = FDIVQ; break;
-         case 0x06e: type = 0x33a; func = FDMULQ; break;
-         case 0x0c7: type = 0x31c; func = FQTOS; break;
-         case 0x0cb: type = 0x32c; func = FQTOD; break;
-         case 0x0cc: type = 0x334; func = FITOQ; break;
-         case 0x0cd: type = 0x334; func = FSTOQ; break;
-         case 0x0ce: type = 0x338; func = FDTOQ; break;
-         case 0x0d3: type = 0x31c; func = FQTOI; break;
-            /* SUBNORMAL - ftt == 2 */
-         case 0x029: type = 0x214; func = FSQRTS; break;
-         case 0x02a: type = 0x228; func = FSQRTD; break;
-         case 0x041: type = 0x215; func = FADDS; break;
-         case 0x042: type = 0x22a; func = FADDD; break;
-         case 0x045: type = 0x215; func = FSUBS; break;
-         case 0x046: type = 0x22a; func = FSUBD; break;
-         case 0x049: type = 0x215; func = FMULS; break;
-         case 0x04a: type = 0x22a; func = FMULD; break;
-         case 0x04d: type = 0x215; func = FDIVS; break;
-         case 0x04e: type = 0x22a; func = FDIVD; break;
-         case 0x069: type = 0x225; func = FSMULD; break;
-         case 0x0c6: type = 0x218; func = FDTOS; break;
-         case 0x0c9: type = 0x224; func = FSTOD; break;
-         case 0x0d1: type = 0x214; func = FSTOI; break;
-         case 0x0d2: type = 0x218; func = FDTOI; break;
-         default: 
-#ifdef DEBUG_MATHEMU         
-         	printk("unknown FPop1: %03lx\n",(insn>>5)&0x1ff);
-#endif         
-      }
-   }
-   else if ((insn & 0xc1f80000) == 0x81a80000) /* FPOP2 */ {
-      switch ((insn >> 5) & 0x1ff) {
-         case 0x051: type = 0x305; func = FCMPS; break;
-         case 0x052: type = 0x30a; func = FCMPD; break;
-         case 0x053: type = 0x30f; func = FCMPQ; break;
-         case 0x055: type = 0x305; func = FCMPES; break;
-         case 0x056: type = 0x30a; func = FCMPED; break;
-         case 0x057: type = 0x30f; func = FCMPEQ; break;
-         default: 
-#ifdef DEBUG_MATHEMU         
-         	printk("unknown FPop2: %03lx\n",(insn>>5)&0x1ff);
-#endif         	
-      }
-   }
-   
-   if (!type) { /* oops, didn't recognise that FPop */
-      printk("attempt to emulate unrecognised FPop!\n");
-      return 0;
-   }
-   
-   /* Decode the registers to be used */
-   freg = (*fsr >> 14) & 0xf;
+			printk("unknown FPop2: %03lx\n",(insn>>5)&0x1ff);
+#endif
+		}
+	}
+
+	if (!type) {	/* oops, didn't recognise that FPop */
+		printk("attempt to emulate unrecognised FPop!\n");
+		return 0;
+	}
+
+	/* Decode the registers to be used */
+	freg = (*fsr >> 14) & 0xf;
 
-   *fsr &= ~0x1c000;                              /* clear the traptype bits */
-    
-   freg = ((insn >> 14) & 0x1f);
-   switch (type & 0x3)                            /* is rs1 single, double or quad? */
-   {
-      case 3:
-         if (freg & 3)                            /* quadwords must have bits 4&5 of the */
-         {                                        /* encoded reg. number set to zero. */
-            *fsr |= (6 << 14);                  
-            return 0;                             /* simulate invalid_fp_register exception */
-         }
-         /* fall through */
-      case 2:
-         if (freg & 1)                            /* doublewords must have bit 5 zeroed */
-         {
-            *fsr |= (6 << 14);
-            return 0;
-         }
-   }
-   rs1 = (void *)&fregs[freg];
-   freg = (insn & 0x1f);
-   switch ((type >> 2) & 0x3)
-   {                                              /* same again for rs2 */
-      case 3:
-         if (freg & 3)                            /* quadwords must have bits 4&5 of the */
-         {                                        /* encoded reg. number set to zero. */
-            *fsr |= (6 << 14);                  
-            return 0;                             /* simulate invalid_fp_register exception */
-         }
-         /* fall through */
-      case 2:
-         if (freg & 1)                            /* doublewords must have bit 5 zeroed */
-         {
-            *fsr |= (6 << 14);
-            return 0;
-         }
-   }
-   rs2 = (void *)&fregs[freg];
-   freg = ((insn >> 25) & 0x1f);
-   switch ((type >> 4) & 0x3)                     /* and finally rd. This one's a bit different */
-   {
-      case 0:                                     /* dest is fcc. (this must be FCMPQ or FCMPEQ) */
-         if (freg)                                /* V8 has only one set of condition codes, so */
-         {                                        /* anything but 0 in the rd field is an error */
-            *fsr |= (6 << 14);                    /* (should probably flag as invalid opcode */
-            return 0;                             /* but SIGFPE will do :-> ) */
-         }
-         rd = (void *)(fsr);                      /* FCMPQ and FCMPEQ are special and only  */
-         break;                                   /* set bits they're supposed to :-> */
-      case 3:
-         if (freg & 3)                            /* quadwords must have bits 4&5 of the */
-         {                                        /* encoded reg. number set to zero. */
-            *fsr |= (6 << 14);
-            return 0;                             /* simulate invalid_fp_register exception */
-         }
-         /* fall through */
-      case 2:
-         if (freg & 1)                            /* doublewords must have bit 5 zeroed */
-         {
-            *fsr |= (6 << 14);
-            return 0;
-         }
-         /* fall through */
-      case 1:
-         rd = (void *)&fregs[freg];
-         break;
-   }
-#ifdef DEBUG_MATHEMU   
-   printk("executing insn...\n");
-#endif   
-   eflag = func(rd, rs2, rs1);                   /* do the Right Thing */
-   if(eflag == 0)
-	   return 1;                             /* success! */
-   return record_exception(fsr, eflag);
+	*fsr &= ~0x1c000;				/* clear the traptype bits */
+
+	freg = ((insn >> 14) & 0x1f);
+	switch (type & 0x3) {				/* is rs1 single, double or quad? */
+	case 3:
+		if (freg & 3) {				/* quadwords must have bits 4&5 of the */
+							/* encoded reg. number set to zero. */
+			*fsr |= (6 << 14);
+			return 0;			/* simulate invalid_fp_register exception */
+		}
+	/* fall through */
+	case 2:
+		if (freg & 1) {				/* doublewords must have bit 5 zeroed */
+			*fsr |= (6 << 14);
+			return 0;
+		}
+	}
+	rs1 = (void *)&fregs[freg];
+	freg = (insn & 0x1f);
+	switch ((type >> 2) & 0x3) {			/* same again for rs2 */
+	case 3:
+		if (freg & 3) {				/* quadwords must have bits 4&5 of the */
+							/* encoded reg. number set to zero. */
+			*fsr |= (6 << 14);
+			return 0;			/* simulate invalid_fp_register exception */
+		}
+	/* fall through */
+	case 2:
+		if (freg & 1) {				/* doublewords must have bit 5 zeroed */
+			*fsr |= (6 << 14);
+			return 0;
+		}
+	}
+	rs2 = (void *)&fregs[freg];
+	freg = ((insn >> 25) & 0x1f);
+	switch ((type >> 4) & 0x3) {			/* and finally rd. This one's a bit different */
+	case 0:						/* dest is fcc. (this must be FCMPQ or FCMPEQ) */
+		if (freg) {				/* V8 has only one set of condition codes, so */
+							/* anything but 0 in the rd field is an error */
+			*fsr |= (6 << 14);		/* (should probably flag as invalid opcode */
+			return 0;			/* but SIGFPE will do :-> ) */
+		}
+		rd = (void *)(fsr);			/* FCMPQ and FCMPEQ are special and only  */
+		break;					/* set bits they're supposed to :-> */
+	case 3:
+		if (freg & 3) {				/* quadwords must have bits 4&5 of the */
+							/* encoded reg. number set to zero. */
+			*fsr |= (6 << 14);
+			return 0;			/* simulate invalid_fp_register exception */
+		}
+	/* fall through */
+	case 2:
+		if (freg & 1) {				/* doublewords must have bit 5 zeroed */
+			*fsr |= (6 << 14);
+			return 0;
+		}
+	/* fall through */
+	case 1:
+		rd = (void *)&fregs[freg];
+		break;
+	}
+#ifdef DEBUG_MATHEMU
+	printk("executing insn...\n");
+#endif
+	eflag = func(rd, rs2, rs1);			/* do the Right Thing */
+	if(eflag == 0)
+		return 1;				/* success! */
+	return record_exception(fsr, eflag);
 }
diff --git a/arch/sparc/math-emu/sfp-machine.h b/arch/sparc/math-emu/sfp-machine.h
index 67a74580c..99448502a 100644
--- a/arch/sparc/math-emu/sfp-machine.h
+++ b/arch/sparc/math-emu/sfp-machine.h
@@ -1,6 +1,11 @@
-/* Machine-dependent software floating-point definitions.  Sparc version.
-   Copyright (C) 1997 Free Software Foundation, Inc.
+/* Machine-dependent software floating-point definitions.
+   Sparc userland (_Q_*) version.
+   Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public License as
@@ -15,382 +20,183 @@
    You should have received a copy of the GNU Library General Public
    License along with the GNU C Library; see the file COPYING.LIB.  If
    not, write to the Free Software Foundation, Inc.,
-   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  
+   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
 
-   Actually, this is a sparc (32bit) version, written based on the
-   i386 and sparc64 versions, by me, 
-   Peter Maydell (pmaydell@chiark.greenend.org.uk).
-   Comments are by and large also mine, although they may be inaccurate.
-
-   In picking out asm fragments I've gone with the lowest common
-   denominator, which also happens to be the hardware I have :->
-   That is, a SPARC without hardware multiply and divide.
- */
-
-
-/* basic word size definitions */
+#ifndef _SFP_MACHINE_H
+#define _SFP_MACHINE_H
+   
 #define _FP_W_TYPE_SIZE		32
 #define _FP_W_TYPE		unsigned long
 #define _FP_WS_TYPE		signed long
 #define _FP_I_TYPE		long
 
-/* You can optionally code some things like addition in asm. For
- * example, i386 defines __FP_FRAC_ADD_2 as asm. If you don't
- * then you get a fragment of C code [if you change an #ifdef 0
- * in op-2.h] or a call to add_ssaaaa (see below).
- * Good places to look for asm fragments to use are gcc and glibc.
- * gcc's longlong.h is useful.
- */
+#define _FP_MUL_MEAT_S(R,X,Y)	_FP_MUL_MEAT_1_wide(S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)	_FP_MUL_MEAT_2_wide(D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)	_FP_MUL_MEAT_4_wide(Q,R,X,Y,umul_ppmm)
 
-/* We need to know how to multiply and divide. If the host word size
- * is >= 2*fracbits you can use FP_MUL_MEAT_n_imm(t,R,X,Y) which
- * codes the multiply with whatever gcc does to 'a * b'.
- * _FP_MUL_MEAT_n_wide(t,R,X,Y,f) is used when you have an asm 
- * function that can multiply two 1W values and get a 2W result. 
- * Otherwise you're stuck with _FP_MUL_MEAT_n_hard(t,R,X,Y) which
- * does bitshifting to avoid overflow.
- * For division there is FP_DIV_MEAT_n_imm(t,R,X,Y,f) for word size
- * >= 2*fracbits, where f is either _FP_DIV_HELP_imm or 
- * _FP_DIV_HELP_ldiv (see op-1.h).
- * _FP_DIV_MEAT_udiv() is if you have asm to do 2W/1W => (1W, 1W).
- * [GCC and glibc have longlong.h which has the asm macro udiv_qrnnd
- * to do this.]
- * In general, 'n' is the number of words required to hold the type,
- * and 't' is either S, D or Q for single/double/quad.
- *           -- PMM
- */
-/* Example: SPARC64:
- * #define _FP_MUL_MEAT_S(R,X,Y)	_FP_MUL_MEAT_1_imm(S,R,X,Y)
- * #define _FP_MUL_MEAT_D(R,X,Y)	_FP_MUL_MEAT_1_wide(D,R,X,Y,umul_ppmm)
- * #define _FP_MUL_MEAT_Q(R,X,Y)	_FP_MUL_MEAT_2_wide(Q,R,X,Y,umul_ppmm)
- *
- * #define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm)
- * #define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_1_udiv(D,R,X,Y)
- * #define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_2_udiv_64(Q,R,X,Y)
- *
- * Example: i386:
- * #define _FP_MUL_MEAT_S(R,X,Y)   _FP_MUL_MEAT_1_wide(S,R,X,Y,_i386_mul_32_64)
- * #define _FP_MUL_MEAT_D(R,X,Y)   _FP_MUL_MEAT_2_wide(D,R,X,Y,_i386_mul_32_64)
- *
- * #define _FP_DIV_MEAT_S(R,X,Y)   _FP_DIV_MEAT_1_udiv(S,R,X,Y,_i386_div_64_32)
- * #define _FP_DIV_MEAT_D(R,X,Y)   _FP_DIV_MEAT_2_udiv_64(D,R,X,Y)
- */
-#define _FP_MUL_MEAT_S(R,X,Y)   _FP_MUL_MEAT_1_wide(S,R,X,Y,umul_ppmm)
-#define _FP_MUL_MEAT_D(R,X,Y)   _FP_MUL_MEAT_2_wide(D,R,X,Y,umul_ppmm)
-/* FIXME: This is not implemented, but should be soon */
-#define _FP_MUL_MEAT_Q(R,X,Y)   _FP_FRAC_SET_4(R, _FP_ZEROFRAC_4)
-#define _FP_DIV_MEAT_S(R,X,Y)   _FP_DIV_MEAT_1_udiv(S,R,X,Y)
-#define _FP_DIV_MEAT_D(R,X,Y)   _FP_DIV_MEAT_2_udiv_64(D,R,X,Y)
-/* FIXME: This is not implemented, but should be soon */
-#define _FP_DIV_MEAT_Q(R,X,Y)   _FP_FRAC_SET_4(R, _FP_ZEROFRAC_4)
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_udiv(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
 
-/* These macros define what NaN looks like. They're supposed to expand to 
- * a comma-separated set of 32bit unsigned ints that encode NaN.
- */
-#define _FP_NANFRAC_S		_FP_QNANBIT_S
-#define _FP_NANFRAC_D		_FP_QNANBIT_D, 0
-#define _FP_NANFRAC_Q           _FP_QNANBIT_Q, 0, 0, 0
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
 
 #define _FP_KEEPNANFRACP 1
 
-/* This macro appears to be called when both X and Y are NaNs, and 
- * has to choose one and copy it to R. i386 goes for the larger of the
- * two, sparc64 just picks Y. I don't understand this at all so I'll
- * go with sparc64 because it's shorter :->   -- PMM 
+/* If one NaN is signaling and the other is not,
+ * we choose that one, otherwise we choose X.
+ */
+/* For _Qp_* and _Q_*, this should prefer X, for
+ * CPU instruction emulation this should prefer Y.
+ * (see SPAMv9 B.2.2 section).
  */
 #define _FP_CHOOSENAN(fs, wc, R, X, Y)				\
   do {								\
-    R##_s = Y##_s;						\
-    _FP_FRAC_COPY_##wc(R,Y);					\
+    if ((_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)		\
+	&& !(_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs))	\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    else							\
+      {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
     R##_c = FP_CLS_NAN;						\
   } while (0)
-  
-#define __FP_UNPACK_RAW_1(fs, X, val)				\
-  do {								\
-    union _FP_UNION_##fs *_flo =				\
-    	(union _FP_UNION_##fs *)val;				\
-								\
-    X##_f = _flo->bits.frac;					\
-    X##_e = _flo->bits.exp;					\
-    X##_s = _flo->bits.sign;					\
-  } while (0)
-
-#define __FP_UNPACK_RAW_2(fs, X, val)			\
-  do {							\
-    union _FP_UNION_##fs *_flo =			\
-    	(union _FP_UNION_##fs *)val;			\
-							\
-    X##_f0 = _flo->bits.frac0;				\
-    X##_f1 = _flo->bits.frac1;				\
-    X##_e  = _flo->bits.exp;				\
-    X##_s  = _flo->bits.sign;				\
-  } while (0)
 
-#define __FP_UNPACK_RAW_4(fs, X, val)			\
-  do {							\
-    union _FP_UNION_##fs *_flo =			\
-    	(union _FP_UNION_##fs *)val;			\
-							\
-    X##_f[0] = _flo->bits.frac0;			\
-    X##_f[1] = _flo->bits.frac1;			\
-    X##_f[2] = _flo->bits.frac2;			\
-    X##_f[3] = _flo->bits.frac3;			\
-    X##_e  = _flo->bits.exp;				\
-    X##_s  = _flo->bits.sign;				\
+/* Some assembly to speed things up. */
+#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)			\
+  __asm__ ("addcc %r7,%8,%2
+	    addxcc %r5,%6,%1
+	    addx %r3,%4,%0"						\
+	   : "=r" ((USItype)(r2)),					\
+	     "=&r" ((USItype)(r1)),					\
+	     "=&r" ((USItype)(r0))					\
+	   : "%rJ" ((USItype)(x2)),					\
+	     "rI" ((USItype)(y2)),					\
+	     "%rJ" ((USItype)(x1)),					\
+	     "rI" ((USItype)(y1)),					\
+	     "%rJ" ((USItype)(x0)),					\
+	     "rI" ((USItype)(y0))					\
+	   : "cc")
+
+#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)			\
+  __asm__ ("subcc %r7,%8,%2
+	    subxcc %r5,%6,%1
+	    subx %r3,%4,%0"						\
+	   : "=r" ((USItype)(r2)),					\
+	     "=&r" ((USItype)(r1)),					\
+	     "=&r" ((USItype)(r0))					\
+	   : "%rJ" ((USItype)(x2)),					\
+	     "rI" ((USItype)(y2)),					\
+	     "%rJ" ((USItype)(x1)),					\
+	     "rI" ((USItype)(y1)),					\
+	     "%rJ" ((USItype)(x0)),					\
+	     "rI" ((USItype)(y0))					\
+	   : "cc")
+
+#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)		\
+  do {									\
+    /* We need to fool gcc,  as we need to pass more than 10		\
+       input/outputs.  */						\
+    register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2");		\
+    __asm__ __volatile__ ("
+	    addcc %r8,%9,%1
+	    addxcc %r6,%7,%0
+	    addxcc %r4,%5,%%g2
+	    addx %r2,%3,%%g1"						\
+	   : "=&r" ((USItype)(r1)),					\
+	     "=&r" ((USItype)(r0))					\
+	   : "%rJ" ((USItype)(x3)),					\
+	     "rI" ((USItype)(y3)),					\
+	     "%rJ" ((USItype)(x2)),					\
+	     "rI" ((USItype)(y2)),					\
+	     "%rJ" ((USItype)(x1)),					\
+	     "rI" ((USItype)(y1)),					\
+	     "%rJ" ((USItype)(x0)),					\
+	     "rI" ((USItype)(y0))					\
+	   : "cc", "g1", "g2");						\
+    __asm__ __volatile__ ("" : "=r" (_t1), "=r" (_t2));			\
+    r3 = _t1; r2 = _t2;							\
   } while (0)
 
-#define __FP_UNPACK_S(X,val)		\
-  do {					\
-    __FP_UNPACK_RAW_1(S,X,val);		\
-    _FP_UNPACK_CANONICAL(S,1,X);	\
+#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)		\
+  do {									\
+    /* We need to fool gcc,  as we need to pass more than 10		\
+       input/outputs.  */						\
+    register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2");		\
+    __asm__ __volatile__ ("
+	    subcc %r8,%9,%1
+	    subxcc %r6,%7,%0
+	    subxcc %r4,%5,%%g2
+	    subx %r2,%3,%%g1"						\
+	   : "=&r" ((USItype)(r1)),					\
+	     "=&r" ((USItype)(r0))					\
+	   : "%rJ" ((USItype)(x3)),					\
+	     "rI" ((USItype)(y3)),					\
+	     "%rJ" ((USItype)(x2)),					\
+	     "rI" ((USItype)(y2)),					\
+	     "%rJ" ((USItype)(x1)),					\
+	     "rI" ((USItype)(y1)),					\
+	     "%rJ" ((USItype)(x0)),					\
+	     "rI" ((USItype)(y0))					\
+	   : "cc", "g1", "g2");						\
+    __asm__ __volatile__ ("" : "=r" (_t1), "=r" (_t2));			\
+    r3 = _t1; r2 = _t2;							\
   } while (0)
 
-#define __FP_UNPACK_D(X,val)		\
-  do {					\
-    __FP_UNPACK_RAW_2(D,X,val);		\
-    _FP_UNPACK_CANONICAL(D,2,X);	\
-  } while (0)
-
-#define __FP_UNPACK_Q(X,val)		\
-  do {					\
-    __FP_UNPACK_RAW_4(Q,X,val);		\
-    _FP_UNPACK_CANONICAL(Q,4,X);	\
-  } while (0)
-
-#define __FP_PACK_RAW_1(fs, val, X)				\
-  do {								\
-    union _FP_UNION_##fs *_flo =				\
-    	(union _FP_UNION_##fs *)val;				\
-								\
-    _flo->bits.frac = X##_f;					\
-    _flo->bits.exp  = X##_e;					\
-    _flo->bits.sign = X##_s;					\
-  } while (0)
-  
-#define __FP_PACK_RAW_2(fs, val, X)			\
-  do {							\
-    union _FP_UNION_##fs *_flo =			\
-    	(union _FP_UNION_##fs *)val;			\
-							\
-    _flo->bits.frac0 = X##_f0;				\
-    _flo->bits.frac1 = X##_f1;				\
-    _flo->bits.exp   = X##_e;				\
-    _flo->bits.sign  = X##_s;				\
-  } while (0)
-
-#define __FP_PACK_RAW_4(fs, val, X)			\
-  do {							\
-    union _FP_UNION_##fs *_flo =			\
-    	(union _FP_UNION_##fs *)val;			\
-							\
-    _flo->bits.frac0 = X##_f[0];			\
-    _flo->bits.frac1 = X##_f[1];			\
-    _flo->bits.frac2 = X##_f[2];			\
-    _flo->bits.frac3 = X##_f[3];			\
-    _flo->bits.exp   = X##_e;				\
-    _flo->bits.sign  = X##_s;				\
-  } while (0)
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-
-/* We only actually write to the destination register
- * if exceptions signalled (if any) will not trap.
- */
-#ifdef __SMP__
-#define __FPU_TEM \
-	(((current->tss.fsr)>>23)&0x1f)
-#else
+#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) __FP_FRAC_SUB_3(x2,x1,x0,x2,x1,x0,y2,y1,y0)
+
+#define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) __FP_FRAC_SUB_4(x3,x2,x1,x0,x3,x2,x1,x0,y3,y2,y1,y0)
+
+#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)					\
+  __asm__ ("addcc %3,%4,%3
+	    addxcc %2,%%g0,%2
+	    addxcc %1,%%g0,%1
+	    addx %0,%%g0,%0"						\
+	   : "=&r" ((USItype)(x3)),					\
+	     "=&r" ((USItype)(x2)),					\
+	     "=&r" ((USItype)(x1)),					\
+	     "=&r" ((USItype)(x0))					\
+	   : "rI" ((USItype)(i)),					\
+	     "0" ((USItype)(x3)),					\
+	     "1" ((USItype)(x2)),					\
+	     "2" ((USItype)(x1)),					\
+	     "3" ((USItype)(x0))					\
+	   : "cc")
+
+#ifndef __SMP__
 extern struct task_struct *last_task_used_math;
-#define __FPU_TEM \
-	(((last_task_used_math->tss.fsr)>>23)&0x1f)
 #endif
-#define __FPU_TRAP_P(bits) \
-	((__FPU_TEM & (bits)) != 0)
-
-#define __FP_PACK_S(val,X)			\
-({  int __exc = _FP_PACK_CANONICAL(S,1,X);	\
-    if(!__exc || !__FPU_TRAP_P(__exc))		\
-        __FP_PACK_RAW_1(S,val,X);		\
-    __exc;					\
-})
-
-#define __FP_PACK_D(val,X)			\
-({  int __exc = _FP_PACK_CANONICAL(D,2,X);	\
-    if(!__exc || !__FPU_TRAP_P(__exc))		\
-        __FP_PACK_RAW_2(D,val,X);		\
-    __exc;					\
-})
-
-#define __FP_PACK_Q(val,X)			\
-({  int __exc = _FP_PACK_CANONICAL(Q,4,X);	\
-    if(!__exc || !__FPU_TRAP_P(__exc))		\
-        __FP_PACK_RAW_4(Q,val,X);		\
-    __exc;					\
-})
 
 /* Obtain the current rounding mode. */
+#ifndef FP_ROUNDMODE
 #ifdef __SMP__
 #define FP_ROUNDMODE	((current->tss.fsr >> 30) & 0x3)
 #else
 #define FP_ROUNDMODE	((last_task_used_math->tss.fsr >> 30) & 0x3)
 #endif
+#endif
 
-/* the asm fragments go here: all these are taken from glibc-2.0.5's stdlib/longlong.h */
-
-#include <linux/types.h>
-#include <asm/byteorder.h>
-
-/* add_ssaaaa is used in op-2.h and should be equivalent to
- * #define add_ssaaaa(sh,sl,ah,al,bh,bl) (sh = ah+bh+ (( sl = al+bl) < al))
- * add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
- * high_addend_2, low_addend_2) adds two UWtype integers, composed by
- * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
- * respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
- * (i.e. carry out) is not stored anywhere, and is lost.
- */
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("addcc %r4,%5,%1
-        addx %r2,%3,%0"                                                 \
-           : "=r" ((USItype)(sh)),                                      \
-             "=&r" ((USItype)(sl))                                      \
-           : "%rJ" ((USItype)(ah)),                                     \
-             "rI" ((USItype)(bh)),                                      \
-             "%rJ" ((USItype)(al)),                                     \
-             "rI" ((USItype)(bl))                                       \
-           : "cc")
-
-
-/* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to
- * #define sub_ddmmss(sh, sl, ah, al, bh, bl) (sh = ah-bh - ((sl = al-bl) > al))
- * sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
- * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
- * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
- * LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
- * and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
- * and is lost.
- */
-
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("subcc %r4,%5,%1
-        subx %r2,%3,%0"                                                 \
-           : "=r" ((USItype)(sh)),                                      \
-             "=&r" ((USItype)(sl))                                      \
-           : "rJ" ((USItype)(ah)),                                      \
-             "rI" ((USItype)(bh)),                                      \
-             "rJ" ((USItype)(al)),                                      \
-             "rI" ((USItype)(bl))                                       \
-           : "cc")
-
-
-/* asm fragments for mul and div */	 
-/* umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
- * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
- * word product in HIGH_PROD and LOW_PROD.
- * These look ugly because the sun4/4c don't have umul/udiv/smul/sdiv in
- * hardware. 
- */
-#define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("! Inlined umul_ppmm
-        wr      %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr
-        sra     %3,31,%%g2      ! Don't move this insn
-        and     %2,%%g2,%%g2    ! Don't move this insn
-        andcc   %%g0,0,%%g1     ! Don't move this insn
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,%3,%%g1
-        mulscc  %%g1,0,%%g1
-        add     %%g1,%%g2,%0
-        rd      %%y,%1"                                                 \
-           : "=r" ((USItype)(w1)),                                      \
-             "=r" ((USItype)(w0))                                       \
-           : "%rI" ((USItype)(u)),                                      \
-             "r" ((USItype)(v))                                         \
-           : "%g1", "%g2", "cc")
-
-/* udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
- * denominator) divides a UDWtype, composed by the UWtype integers
- * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
- * in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
- * than DENOMINATOR for correct operation.  If, in addition, the most
- * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
- * UDIV_NEEDS_NORMALIZATION is defined to 1.
- */
-
-#define udiv_qrnnd(q, r, n1, n0, d) \
-  __asm__ ("! Inlined udiv_qrnnd
-        mov     32,%%g1
-        subcc   %1,%2,%%g0
-1:      bcs     5f
-         addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb
-        sub     %1,%2,%1        ! this kills msb of n
-        addx    %1,%1,%1        ! so this can't give carry
-        subcc   %%g1,1,%%g1
-2:      bne     1b
-         subcc  %1,%2,%%g0
-        bcs     3f
-         addxcc %0,%0,%0        ! shift n1n0 and a q-bit in lsb
-        b       3f
-         sub    %1,%2,%1        ! this kills msb of n
-4:      sub     %1,%2,%1
-5:      addxcc  %1,%1,%1
-        bcc     2b
-         subcc  %%g1,1,%%g1
-! Got carry from n.  Subtract next step to cancel this carry.
-        bne     4b
-         addcc  %0,%0,%0        ! shift n1n0 and a 0-bit in lsb
-        sub     %1,%2,%1
-3:      xnor    %0,0,%0
-        ! End of inline udiv_qrnnd"                                     \
-           : "=&r" ((USItype) (q)),                                     \
-             "=&r" ((USItype) (r))                                      \
-           : "r" ((USItype) (d)),                                       \
-             "1" ((USItype) (n1)),                                      \
-             "0" ((USItype) (n0)) : "%g1", "cc")
-
-#define UDIV_NEEDS_NORMALIZATION 0
+/* Exception flags. */
+#define FP_EX_INVALID		(1 << 4)
+#define FP_EX_OVERFLOW		(1 << 3)
+#define FP_EX_UNDERFLOW		(1 << 2)
+#define FP_EX_DIVZERO		(1 << 1)
+#define FP_EX_INEXACT		(1 << 0)
 
-#define abort()								\
-	return 0
+#define FP_HANDLE_EXCEPTIONS return _fex
 
-#ifdef __BIG_ENDIAN
-#define __BYTE_ORDER __BIG_ENDIAN
+#ifdef __SMP__
+#define FP_INHIBIT_RESULTS ((current->tss.fsr >> 23) & _fex)
 #else
-#define __BYTE_ORDER __LITTLE_ENDIAN
+#define FP_INHIBIT_RESULTS ((last_task_used_math->tss.fsr >> 23) & _fex)
 #endif
 
-/* Exception flags. */
-#define EFLAG_INVALID		(1 << 4)
-#define EFLAG_OVERFLOW		(1 << 3)
-#define EFLAG_UNDERFLOW		(1 << 2)
-#define EFLAG_DIVZERO		(1 << 1)
-#define EFLAG_INEXACT		(1 << 0)
+#endif
diff --git a/arch/sparc/math-emu/sfp-util.h b/arch/sparc/math-emu/sfp-util.h
new file mode 100644
index 000000000..75ec69124
--- /dev/null
+++ b/arch/sparc/math-emu/sfp-util.h
@@ -0,0 +1,115 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) 				\
+  __asm__ ("addcc %r4,%5,%1
+	addx %r2,%3,%0"							\
+	   : "=r" ((USItype)(sh)),					\
+	     "=&r" ((USItype)(sl))					\
+	   : "%rJ" ((USItype)(ah)),					\
+	     "rI" ((USItype)(bh)),					\
+	     "%rJ" ((USItype)(al)),					\
+	     "rI" ((USItype)(bl))					\
+	   : "cc")
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) 				\
+  __asm__ ("subcc %r4,%5,%1
+	subx %r2,%3,%0"							\
+	   : "=r" ((USItype)(sh)),					\
+	     "=&r" ((USItype)(sl))					\
+	   : "rJ" ((USItype)(ah)),					\
+	     "rI" ((USItype)(bh)),					\
+	     "rJ" ((USItype)(al)),					\
+	     "rI" ((USItype)(bl))					\
+	   : "cc")
+
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("! Inlined umul_ppmm
+	wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr
+	sra	%3,31,%%g2	! Don't move this insn
+	and	%2,%%g2,%%g2	! Don't move this insn
+	andcc	%%g0,0,%%g1	! Don't move this insn
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,%3,%%g1
+	mulscc	%%g1,0,%%g1
+	add	%%g1,%%g2,%0
+	rd	%%y,%1"							\
+	   : "=r" ((USItype)(w1)),					\
+	     "=r" ((USItype)(w0))					\
+	   : "%rI" ((USItype)(u)),					\
+	     "r" ((USItype)(v))						\
+	   : "%g1", "%g2", "cc")
+
+/* It's quite necessary to add this much assembler for the sparc.
+   The default udiv_qrnnd (in C) is more than 10 times slower!  */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("! Inlined udiv_qrnnd
+	mov	32,%%g1
+	subcc	%1,%2,%%g0
+1:	bcs	5f
+	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb
+	sub	%1,%2,%1	! this kills msb of n
+	addx	%1,%1,%1	! so this can't give carry
+	subcc	%%g1,1,%%g1
+2:	bne	1b
+	 subcc	%1,%2,%%g0
+	bcs	3f
+	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb
+	b	3f
+	 sub	%1,%2,%1	! this kills msb of n
+4:	sub	%1,%2,%1
+5:	addxcc	%1,%1,%1
+	bcc	2b
+	 subcc	%%g1,1,%%g1
+! Got carry from n.  Subtract next step to cancel this carry.
+	bne	4b
+	 addcc	%0,%0,%0	! shift n1n0 and a 0-bit in lsb
+	sub	%1,%2,%1
+3:	xnor	%0,0,%0
+	! End of inline udiv_qrnnd"					\
+	   : "=&r" ((USItype)(q)),					\
+	     "=&r" ((USItype)(r))					\
+	   : "r" ((USItype)(d)),					\
+	     "1" ((USItype)(n1)),					\
+	     "0" ((USItype)(n0)) : "%g1", "cc")
+#define UDIV_NEEDS_NORMALIZATION 0
+
+#define abort()								\
+	return 0
+
+#ifdef __BIG_ENDIAN
+#define __BYTE_ORDER __BIG_ENDIAN
+#else
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#endif