diff options
Diffstat (limited to 'arch/sparc/math-emu')
-rw-r--r-- | arch/sparc/math-emu/Makefile | 6 | ||||
-rw-r--r-- | arch/sparc/math-emu/fabss.c | 8 | ||||
-rw-r--r-- | arch/sparc/math-emu/fcmpd.c | 33 | ||||
-rw-r--r-- | arch/sparc/math-emu/fcmped.c | 33 | ||||
-rw-r--r-- | arch/sparc/math-emu/fcmpeq.c | 29 | ||||
-rw-r--r-- | arch/sparc/math-emu/fcmpes.c | 33 | ||||
-rw-r--r-- | arch/sparc/math-emu/fcmpq.c | 29 | ||||
-rw-r--r-- | arch/sparc/math-emu/fcmps.c | 33 | ||||
-rw-r--r-- | arch/sparc/math-emu/fdmulq.c | 17 | ||||
-rw-r--r-- | arch/sparc/math-emu/fdtoq.c | 15 | ||||
-rw-r--r-- | arch/sparc/math-emu/fdtos.c | 15 | ||||
-rw-r--r-- | arch/sparc/math-emu/fmovs.c | 7 | ||||
-rw-r--r-- | arch/sparc/math-emu/fnegs.c | 7 | ||||
-rw-r--r-- | arch/sparc/math-emu/fqtod.c | 15 | ||||
-rw-r--r-- | arch/sparc/math-emu/fqtos.c | 15 | ||||
-rw-r--r-- | arch/sparc/math-emu/fsmuld.c | 17 | ||||
-rw-r--r-- | arch/sparc/math-emu/fstod.c | 15 | ||||
-rw-r--r-- | arch/sparc/math-emu/fstoq.c | 15 | ||||
-rw-r--r-- | arch/sparc/math-emu/math.c | 551 | ||||
-rw-r--r-- | arch/sparc/math-emu/sfp-machine.h | 496 | ||||
-rw-r--r-- | arch/sparc/math-emu/sfp-util.h | 115 |
21 files changed, 767 insertions, 737 deletions
diff --git a/arch/sparc/math-emu/Makefile b/arch/sparc/math-emu/Makefile index d7642b2e9..b85e48924 100644 --- a/arch/sparc/math-emu/Makefile +++ b/arch/sparc/math-emu/Makefile @@ -14,13 +14,13 @@ O_OBJS := math.o ashldi3.o fabss.o faddd.o faddq.o fadds.o \ fdtos.o fitoq.o fmovs.o fmuld.o fmulq.o fmuls.o \ fnegs.o fqtod.o fqtoi.o fqtos.o fsmuld.o fsqrtd.o \ fsqrtq.o fsqrts.o fstod.o fstoi.o fstoq.o fsubd.o \ - fsubq.o fsubs.o udivmodti4.o + fsubq.o fsubs.o LINKS := double.h faddd.c faddq.c fadds.c fdivd.c fdivq.c fdivs.c \ fdtoi.c fitoq.c fmuld.c fmulq.c fmuls.c fqtoi.c \ fsqrtd.c fsqrtq.c fsqrts.c fstoi.c fsubd.c \ - fsubq.c fsubs.c op-1.h op-2.h op-4.h op-common.h quad.h \ - single.h soft-fp.h udivmodti4.c + fsubq.c fsubs.c op-1.h op-2.h op-4.h op-8.h \ + op-common.h quad.h single.h soft-fp.h .S.s: $(CPP) -D__ASSEMBLY__ -ansi $< -o $*.s diff --git a/arch/sparc/math-emu/fabss.c b/arch/sparc/math-emu/fabss.c index 5429cc733..7af16b7f9 100644 --- a/arch/sparc/math-emu/fabss.c +++ b/arch/sparc/math-emu/fabss.c @@ -1,6 +1,12 @@ +/* $Id: fabss.c,v 1.8 1999/05/28 13:41:33 jj Exp $ + * arch/sparc/math-emu/fabss.c + * + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + int FABSS(unsigned long *rd, unsigned long *rs2) { - /* Clear the sign bit (high bit of word 0) */ rd[0] = rs2[0] & 0x7fffffffUL; return 0; } diff --git a/arch/sparc/math-emu/fcmpd.c b/arch/sparc/math-emu/fcmpd.c index 8adb30d88..05a460eee 100644 --- a/arch/sparc/math-emu/fcmpd.c +++ b/arch/sparc/math-emu/fcmpd.c @@ -1,18 +1,33 @@ +/* $Id: fcmpd.c,v 1.8 1999/05/28 13:41:36 jj Exp $ + * arch/sparc/math-emu/fcmpd.c + * + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + +#include "sfp-util.h" #include "soft-fp.h" #include "double.h" int FCMPD(void *rd, void *rs2, void *rs1) { + FP_DECL_EX; FP_DECL_D(A); FP_DECL_D(B); long ret; - unsigned long *fsr = rd; + unsigned long fsr; - __FP_UNPACK_D(A, rs1); - __FP_UNPACK_D(B, rs2); - FP_CMP_D(ret, B, A, 2); - if (ret == -1) - ret = 2; - - *fsr = (*fsr & ~0xc00) | (ret << 10); - return 0; + FP_UNPACK_RAW_DP(A, rs1); + FP_UNPACK_RAW_DP(B, rs2); + FP_CMP_D(ret, B, A, 3); + if (ret == 3 && (FP_ISSIGNAN_D(A) || FP_ISSIGNAN_D(B))) + FP_SET_EXCEPTION(FP_EX_INVALID); + if (!FP_INHIBIT_RESULTS) { + if (ret == -1) ret = 2; + fsr = *(long *)rd; + fsr &= ~0xc00; + fsr |= (ret << 10); + *(long *)rd = fsr; + } + FP_HANDLE_EXCEPTIONS; } diff --git a/arch/sparc/math-emu/fcmped.c b/arch/sparc/math-emu/fcmped.c index 2033b1dc8..5b634da28 100644 --- a/arch/sparc/math-emu/fcmped.c +++ b/arch/sparc/math-emu/fcmped.c @@ -1,18 +1,33 @@ +/* $Id: fcmped.c,v 1.8 1999/05/28 13:41:38 jj Exp $ + * arch/sparc/math-emu/fcmped.c + * + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + +#include "sfp-util.h" #include "soft-fp.h" #include "double.h" int FCMPED(void *rd, void *rs2, void *rs1) { + FP_DECL_EX; FP_DECL_D(A); FP_DECL_D(B); long ret; - unsigned long *fsr = rd; + unsigned long fsr; - __FP_UNPACK_D(A, rs1); - __FP_UNPACK_D(B, rs2); - FP_CMP_D(ret, B, A, 2); - if (ret == -1) - ret = 2; - - *fsr = (*fsr & ~0xc00) | (ret << 10); - return 0; + FP_UNPACK_RAW_DP(A, rs1); + FP_UNPACK_RAW_DP(B, rs2); + FP_CMP_D(ret, B, A, 3); + if (ret == 3) + FP_SET_EXCEPTION(FP_EX_INVALID); + if (!FP_INHIBIT_RESULTS) { + if (ret == -1) ret = 2; + fsr = *(long *)rd; + fsr &= ~0xc00; + fsr |= (ret << 10); + *(long *)rd = fsr; + } + FP_HANDLE_EXCEPTIONS; } diff --git a/arch/sparc/math-emu/fcmpeq.c b/arch/sparc/math-emu/fcmpeq.c index de99bf343..eb76019ab 100644 --- a/arch/sparc/math-emu/fcmpeq.c +++ b/arch/sparc/math-emu/fcmpeq.c @@ -1,18 +1,33 @@ +/* $Id: fcmpeq.c,v 1.8 1999/05/28 13:41:42 jj Exp $ + * arch/sparc/math-emu/fcmpeq.c + * + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + +#include "sfp-util.h" #include "soft-fp.h" #include "quad.h" int FCMPEQ(void *rd, void *rs2, void *rs1) { + FP_DECL_EX; FP_DECL_Q(A); FP_DECL_Q(B); long ret; unsigned long fsr; - __FP_UNPACK_Q(A, rs1); - __FP_UNPACK_Q(B, rs2); + FP_UNPACK_RAW_QP(A, rs1); + FP_UNPACK_RAW_QP(B, rs2); FP_CMP_Q(ret, B, A, 3); - if (ret == -1) ret = 2; - fsr = *(unsigned long *)rd; - fsr &= ~0xc00; fsr |= (ret << 10); - *(unsigned long *)rd = fsr; - return 0; + if (ret == 3) + FP_SET_EXCEPTION(FP_EX_INVALID); + if (!FP_INHIBIT_RESULTS) { + if (ret == -1) ret = 2; + fsr = *(long *)rd; + fsr &= ~0xc00; + fsr |= (ret << 10); + *(long *)rd = fsr; + } + FP_HANDLE_EXCEPTIONS; } diff --git a/arch/sparc/math-emu/fcmpes.c b/arch/sparc/math-emu/fcmpes.c index a078a1243..365937edc 100644 --- a/arch/sparc/math-emu/fcmpes.c +++ b/arch/sparc/math-emu/fcmpes.c @@ -1,18 +1,33 @@ +/* $Id: fcmpes.c,v 1.8 1999/05/28 13:41:45 jj Exp $ + * arch/sparc/math-emu/fcmpes.c + * + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + +#include "sfp-util.h" #include "soft-fp.h" #include "single.h" int FCMPES(void *rd, void *rs2, void *rs1) { + FP_DECL_EX; FP_DECL_S(A); FP_DECL_S(B); long ret; - unsigned long *fsr = rd; + unsigned long fsr; - __FP_UNPACK_S(A, rs1); - __FP_UNPACK_S(B, rs2); - FP_CMP_S(ret, B, A, 1); - if (ret == -1) - ret = 2; - - *fsr = (*fsr & ~0xc00) | (ret << 10); - return 0; + FP_UNPACK_RAW_SP(A, rs1); + FP_UNPACK_RAW_SP(B, rs2); + FP_CMP_S(ret, B, A, 3); + if (ret == 3) + FP_SET_EXCEPTION(FP_EX_INVALID); + if (!FP_INHIBIT_RESULTS) { + if (ret == -1) ret = 2; + fsr = *(long *)rd; + fsr &= ~0xc00; + fsr |= (ret << 10); + *(long *)rd = fsr; + } + FP_HANDLE_EXCEPTIONS; } diff --git a/arch/sparc/math-emu/fcmpq.c b/arch/sparc/math-emu/fcmpq.c index f3d1b1233..ffaeb1a1e 100644 --- a/arch/sparc/math-emu/fcmpq.c +++ b/arch/sparc/math-emu/fcmpq.c @@ -1,18 +1,33 @@ +/* $Id: fcmpq.c,v 1.8 1999/05/28 13:41:48 jj Exp $ + * arch/sparc/math-emu/fcmpq.c + * + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + +#include "sfp-util.h" #include "soft-fp.h" #include "quad.h" int FCMPQ(void *rd, void *rs2, void *rs1) { + FP_DECL_EX; FP_DECL_Q(A); FP_DECL_Q(B); long ret; unsigned long fsr; - __FP_UNPACK_Q(A, rs1); - __FP_UNPACK_Q(B, rs2); + FP_UNPACK_RAW_QP(A, rs1); + FP_UNPACK_RAW_QP(B, rs2); FP_CMP_Q(ret, B, A, 3); - if (ret == -1) ret = 2; - fsr = *(unsigned long *)rd; - fsr &= ~0xc00; fsr |= (ret << 10); - *(unsigned long *)rd = fsr; - return 0; + if (ret == 3 && (FP_ISSIGNAN_Q(A) || FP_ISSIGNAN_Q(B))) + FP_SET_EXCEPTION(FP_EX_INVALID); + if (!FP_INHIBIT_RESULTS) { + if (ret == -1) ret = 2; + fsr = *(long *)rd; + fsr &= ~0xc00; + fsr |= (ret << 10); + *(long *)rd = fsr; + } + FP_HANDLE_EXCEPTIONS; } diff --git a/arch/sparc/math-emu/fcmps.c b/arch/sparc/math-emu/fcmps.c index 7e273320f..3be1315d7 100644 --- a/arch/sparc/math-emu/fcmps.c +++ b/arch/sparc/math-emu/fcmps.c @@ -1,18 +1,33 @@ +/* $Id: fcmps.c,v 1.8 1999/05/28 13:41:51 jj Exp $ + * arch/sparc/math-emu/fcmps.c + * + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + +#include "sfp-util.h" #include "soft-fp.h" #include "single.h" int FCMPS(void *rd, void *rs2, void *rs1) { + FP_DECL_EX; FP_DECL_S(A); FP_DECL_S(B); long ret; - unsigned long *fsr = rd; + unsigned long fsr; - __FP_UNPACK_S(A, rs1); - __FP_UNPACK_S(B, rs2); - FP_CMP_S(ret, B, A, 1); - if (ret == -1) - ret = 2; - - *fsr = (*fsr & ~0xc00) | (ret << 10); - return 0; + FP_UNPACK_RAW_SP(A, rs1); + FP_UNPACK_RAW_SP(B, rs2); + FP_CMP_S(ret, B, A, 3); + if (ret == 3 && (FP_ISSIGNAN_S(A) || FP_ISSIGNAN_S(B))) + FP_SET_EXCEPTION(FP_EX_INVALID); + if (!FP_INHIBIT_RESULTS) { + if (ret == -1) ret = 2; + fsr = *(long *)rd; + fsr &= ~0xc00; + fsr |= (ret << 10); + *(long *)rd = fsr; + } + FP_HANDLE_EXCEPTIONS; } diff --git a/arch/sparc/math-emu/fdmulq.c b/arch/sparc/math-emu/fdmulq.c index dd9c7953c..96772d4ef 100644 --- a/arch/sparc/math-emu/fdmulq.c +++ b/arch/sparc/math-emu/fdmulq.c @@ -1,15 +1,26 @@ +/* $Id: fdmulq.c,v 1.9 1999/05/28 13:41:56 jj Exp $ + * arch/sparc/math-emu/fdmulq.c + * + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + +#include "sfp-util.h" #include "soft-fp.h" #include "quad.h" #include "double.h" int FDMULQ(void *rd, void *rs2, void *rs1) { + FP_DECL_EX; FP_DECL_D(IN); FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(R); - __FP_UNPACK_D(IN, rs1); + FP_UNPACK_DP(IN, rs1); FP_CONV(Q,D,4,2,A,IN); - __FP_UNPACK_D(IN, rs2); + FP_UNPACK_DP(IN, rs2); FP_CONV(Q,D,4,2,B,IN); FP_MUL_Q(R, A, B); - return __FP_PACK_Q(rd, R); + FP_PACK_QP(rd, R); + FP_HANDLE_EXCEPTIONS; } diff --git a/arch/sparc/math-emu/fdtoq.c b/arch/sparc/math-emu/fdtoq.c index 7b7746821..701b1ff3f 100644 --- a/arch/sparc/math-emu/fdtoq.c +++ b/arch/sparc/math-emu/fdtoq.c @@ -1,12 +1,23 @@ +/* $Id: fdtoq.c,v 1.9 1999/05/28 13:42:01 jj Exp $ + * arch/sparc/math-emu/fdtoq.c + * + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + +#include "sfp-util.h" #include "soft-fp.h" #include "quad.h" #include "double.h" int FDTOQ(void *rd, void *rs2) { + FP_DECL_EX; FP_DECL_D(A); FP_DECL_Q(R); - __FP_UNPACK_D(A, rs2); + FP_UNPACK_DP(A, rs2); FP_CONV(Q,D,4,2,R,A); - return __FP_PACK_Q(rd, R); + FP_PACK_QP(rd, R); + FP_HANDLE_EXCEPTIONS; } diff --git a/arch/sparc/math-emu/fdtos.c b/arch/sparc/math-emu/fdtos.c index 612434c40..63951befa 100644 --- a/arch/sparc/math-emu/fdtos.c +++ b/arch/sparc/math-emu/fdtos.c @@ -1,12 +1,23 @@ +/* $Id: fdtos.c,v 1.9 1999/05/28 13:42:03 jj Exp $ + * arch/sparc/math-emu/fdtos.c + * + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + +#include "sfp-util.h" #include "soft-fp.h" #include "double.h" #include "single.h" int FDTOS(void *rd, void *rs2) { + FP_DECL_EX; FP_DECL_D(A); FP_DECL_S(R); - __FP_UNPACK_D(A, rs2); + FP_UNPACK_DP(A, rs2); FP_CONV(S,D,1,2,R,A); - return __FP_PACK_S(rd, R); + FP_PACK_SP(rd, R); + FP_HANDLE_EXCEPTIONS; } diff --git a/arch/sparc/math-emu/fmovs.c b/arch/sparc/math-emu/fmovs.c index f113c0bb1..77087a524 100644 --- a/arch/sparc/math-emu/fmovs.c +++ b/arch/sparc/math-emu/fmovs.c @@ -1,3 +1,10 @@ +/* $Id: fmovs.c,v 1.7 1999/05/28 13:42:05 jj Exp $ + * arch/sparc/math-emu/fmovs.c + * + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + int FMOVS(unsigned long *rd, unsigned long *rs2) { rd[0] = rs2[0]; diff --git a/arch/sparc/math-emu/fnegs.c b/arch/sparc/math-emu/fnegs.c index 26a90d778..da2d36ef5 100644 --- a/arch/sparc/math-emu/fnegs.c +++ b/arch/sparc/math-emu/fnegs.c @@ -1,3 +1,10 @@ +/* $Id: fnegs.c,v 1.9 1999/05/28 13:42:06 jj Exp $ + * arch/sparc/math-emu/fnegs.c + * + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + int FNEGS(unsigned long *rd, unsigned long *rs2) { /* just change the sign bit */ diff --git a/arch/sparc/math-emu/fqtod.c b/arch/sparc/math-emu/fqtod.c index 62a437e31..c8aa8edf1 100644 --- a/arch/sparc/math-emu/fqtod.c +++ b/arch/sparc/math-emu/fqtod.c @@ -1,12 +1,23 @@ +/* $Id: fqtod.c,v 1.9 1999/05/28 13:42:08 jj Exp $ + * arch/sparc/math-emu/fqtod.c + * + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + +#include "sfp-util.h" #include "soft-fp.h" #include "quad.h" #include "double.h" int FQTOD(void *rd, void *rs2) { + FP_DECL_EX; FP_DECL_Q(A); FP_DECL_D(R); - __FP_UNPACK_Q(A, rs2); + FP_UNPACK_QP(A, rs2); FP_CONV(D,Q,2,4,R,A); - return __FP_PACK_D(rd, R); + FP_PACK_DP(rd, R); + FP_HANDLE_EXCEPTIONS; } diff --git a/arch/sparc/math-emu/fqtos.c b/arch/sparc/math-emu/fqtos.c index 2520affbf..142dfa9a8 100644 --- a/arch/sparc/math-emu/fqtos.c +++ b/arch/sparc/math-emu/fqtos.c @@ -1,12 +1,23 @@ +/* $Id: fqtos.c,v 1.9 1999/05/28 13:42:10 jj Exp $ + * arch/sparc/math-emu/fqtos.c + * + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + +#include "sfp-util.h" #include "soft-fp.h" #include "quad.h" #include "single.h" int FQTOS(void *rd, void *rs2) { + FP_DECL_EX; FP_DECL_Q(A); FP_DECL_S(R); - __FP_UNPACK_Q(A, rs2); + FP_UNPACK_QP(A, rs2); FP_CONV(S,Q,1,4,R,A); - return __FP_PACK_S(rd, R); + FP_PACK_SP(rd, R); + FP_HANDLE_EXCEPTIONS; } diff --git a/arch/sparc/math-emu/fsmuld.c b/arch/sparc/math-emu/fsmuld.c index b7b992818..2f873e501 100644 --- a/arch/sparc/math-emu/fsmuld.c +++ b/arch/sparc/math-emu/fsmuld.c @@ -1,15 +1,26 @@ +/* $Id: fsmuld.c,v 1.9 1999/05/28 13:42:12 jj Exp $ + * arch/sparc/math-emu/fsmuld.c + * + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + +#include "sfp-util.h" #include "soft-fp.h" #include "double.h" #include "single.h" int FSMULD(void *rd, void *rs2, void *rs1) { + FP_DECL_EX; FP_DECL_S(IN); FP_DECL_D(A); FP_DECL_D(B); FP_DECL_D(R); - __FP_UNPACK_S(IN, rs1); + FP_UNPACK_SP(IN, rs1); FP_CONV(D,S,2,1,A,IN); - __FP_UNPACK_S(IN, rs2); + FP_UNPACK_SP(IN, rs2); FP_CONV(D,S,2,1,B,IN); FP_MUL_D(R, A, B); - return __FP_PACK_D(rd, R); + FP_PACK_DP(rd, R); + FP_HANDLE_EXCEPTIONS; } diff --git a/arch/sparc/math-emu/fstod.c b/arch/sparc/math-emu/fstod.c index ea73660d8..a6206bf20 100644 --- a/arch/sparc/math-emu/fstod.c +++ b/arch/sparc/math-emu/fstod.c @@ -1,12 +1,23 @@ +/* $Id: fstod.c,v 1.9 1999/05/28 13:42:14 jj Exp $ + * arch/sparc/math-emu/fstod.c + * + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + +#include "sfp-util.h" #include "soft-fp.h" #include "double.h" #include "single.h" int FSTOD(void *rd, void *rs2) { + FP_DECL_EX; FP_DECL_S(A); FP_DECL_D(R); - __FP_UNPACK_S(A, rs2); + FP_UNPACK_SP(A, rs2); FP_CONV(D,S,2,1,R,A); - return __FP_PACK_D(rd, R); + FP_PACK_DP(rd, R); + FP_HANDLE_EXCEPTIONS; } diff --git a/arch/sparc/math-emu/fstoq.c b/arch/sparc/math-emu/fstoq.c index 7d201310c..e2257c214 100644 --- a/arch/sparc/math-emu/fstoq.c +++ b/arch/sparc/math-emu/fstoq.c @@ -1,12 +1,23 @@ +/* $Id: fstoq.c,v 1.9 1999/05/28 13:42:16 jj Exp $ + * arch/sparc/math-emu/fstoq.c + * + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) + * + */ + +#include "sfp-util.h" #include "soft-fp.h" #include "quad.h" #include "single.h" int FSTOQ(void *rd, void *rs2) { + FP_DECL_EX; FP_DECL_S(A); FP_DECL_Q(R); - __FP_UNPACK_S(A, rs2); + FP_UNPACK_SP(A, rs2); FP_CONV(Q,S,4,1,R,A); - return __FP_PACK_Q(rd, R); + FP_PACK_QP(rd, R); + FP_HANDLE_EXCEPTIONS; } diff --git a/arch/sparc/math-emu/math.c b/arch/sparc/math-emu/math.c index 68ccb932a..15690b21e 100644 --- a/arch/sparc/math-emu/math.c +++ b/arch/sparc/math-emu/math.c @@ -1,26 +1,19 @@ -/* +/* * arch/sparc/math-emu/math.c * * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk) - * Based on the sparc64 code by Jakub Jelinek. + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + * Copyright (C) 1999 David S. Miller (davem@redhat.com) * * This is a good place to start if you're trying to understand the - * emulation code, because it's pretty simple. What we do is + * emulation code, because it's pretty simple. What we do is * essentially analyse the instruction to work out what the operation * is and which registers are involved. We then execute the appropriate * FXXXX function. [The floating point queue introduces a minor wrinkle; * see below...] * The fxxxxx.c files each emulate a single insn. They look relatively * simple because the complexity is hidden away in an unholy tangle - * of preprocessor macros. - * - * WARNING : don't look at the macro definitions unless you - * absolutely have to! They're extremely ugly, rather complicated - * and a single line in an fxxxx.c file can expand to the equivalent - * of 30 lines or more of C. Of course, any error in those 30 lines - * is reported by the compiler as an error in the single line with the - * macro usage... - * Question: should we replace them with inline functions? + * of preprocessor macros. * * The first layer of macros is single.h, double.h, quad.h. Generally * these files define macros for working with floating point numbers @@ -29,11 +22,11 @@ * generic macros (in this case _FP_ADD(D,2,R,X,Y) where the number * of machine words required to store the given IEEE format is passed * as a parameter. [double.h and co check the number of bits in a word - * and define FP_ADD_D & co appropriately]. + * and define FP_ADD_D & co appropriately]. * The generic macros are defined in op-common.h. This is where all * the grotty stuff like handling NaNs is coded. To handle the possible * word sizes macros in op-common.h use macros like _FP_FRAC_SLL_##wc() - * where wc is the 'number of machine words' parameter (here 2). + * where wc is the 'number of machine words' parameter (here 2). * These are defined in the third layer of macros: op-1.h, op-2.h * and op-4.h. These handle operations on floating point numbers composed * of 1,2 and 4 machine words respectively. [For example, on sparc64 @@ -41,7 +34,7 @@ * constructs in op-1.h, but on sparc32 they use op-2.h definitions.] * soft-fp.h is on the same level as op-common.h, and defines some * macros which are independent of both word size and FP format. - * Finally, sfp-machine.h is the machine dependent part of the + * Finally, sfp-machine.h is the machine dependent part of the * code: it defines the word size and what type a word is. It also * defines how _FP_MUL_MEAT_t() maps to _FP_MUL_MEAT_n_* : op-n.h * provide several possible flavours of multiply algorithm, most @@ -64,59 +57,11 @@ * so we follow that practice... */ -/* WISHLIST: - * - * + Replace all the macros with inline functions. These should - * have the same effect but be much easier to work with. - * - * + Emulate the IEEE exception flags. We don't currently do this - * because a) it would require significant alterations to - * the emulation macros [see the comments about _FP_NEG() - * in op-common.c and note that we'd need to invent a convention - * for passing in the flags to FXXXX fns and returning them] and - * b) SPARClinux doesn't let users access the flags anyway - * [contrast Solaris, which allows you to examine, clear or set - * the flags, and request that exceptions cause SIGFPE - * [which you then set up a signal handler for, obviously...]]. - * Erm, (b) may quite possibly be garbage. %fsr is user-writable - * so you don't need a syscall. There may or may not be library - * support. - * - * + Emulation of FMULQ, FDIVQ, FSQRTQ, FDMULQ needs to be - * written! - * - * + reindent code to conform to Linux kernel standard :-> - * - * + work out whether all the compile-time warnings are bogus - * - * + check that conversion to/from integers works - * - * + check with the SPARC architecture manual to see if we resolve - * the implementation-dependent bits of the IEEE spec in the - * same manner as the hardware. - * - * + more test cases for the test script always welcome! - * - * + illegal opcodes currently cause SIGFPEs. We should arrange - * to tell the traps.c code to SIGILL instead. Currently, - * everywhere that we return 0 should cause SIGILL, I think. - * SIGFPE should only be caused if we set an IEEE exception bit - * and the relevant trap bit is also set. (this means that - * traps.c should do this; also it should handle the case of - * IEEE exception generated directly by the hardware.) - * Should illegal_fp_register (which is a flavour of fp exception) - * cause SIGFPE or SIGILL? - * - * + the test script needs to be extended to handle the quadword - * and comparison insns. - * - * + _FP_DIV_MEAT_2_udiv_64() appears to work but it should be - * checked by somebody who understands the algorithm :-> - * - * + fpsave() saves the FP queue but fpload() doesn't reload it. +/* TODO: + * fpsave() saves the FP queue but fpload() doesn't reload it. * Therefore when we context switch or change FPU ownership * we have to check to see if the queue had anything in it and - * emulate it if it did. This is going to be a pain. + * emulate it if it did. This is going to be a pain. */ #include <linux/types.h> @@ -124,38 +69,21 @@ #include <linux/mm.h> #include <asm/uaccess.h> +#include "sfp-util.h" #include "soft-fp.h" #define FLOATFUNC(x) extern int x(void *,void *,void *) -/* Current status: we don't properly emulate the difficult quadword - * insns (MUL, DIV, SQRT). - * There are also some ops involving the FP registers which we don't - * emulate: the branch on FP condition flags and the load/store to - * FP regs or FSR. I'm assuming that these will never generate traps - * (not unreasonable if there's an FPU at all; comments in the NetBSD - * kernel source agree on this point). If we wanted to allow - * purely software-emulation of the FPU with FPU totally disabled - * or non-existent, we'd have to emulate these as well. We'd also - * need to alter the fp_disabled trap handler to call the math-emu - * code appropriately. The structure of do_one_mathemu() is also - * inappropriate for these ops (as it has no way to alter the pc, - * for a start) and it might be better to special-case them in do_mathemu(). - * Oh, and you'd need to alter the traps.c code so it didn't try to - * fpsave() and fpload(). If there's genuinely no FPU then there's - * probably bits of kernel stuff that just won't work anyway... - */ - /* The Vn labels indicate what version of the SPARC architecture gas thinks - * each insn is. This is from the binutils source :-> + * each insn is. This is from the binutils source :-> */ /* quadword instructions */ -FLOATFUNC(FSQRTQ); /* v8 NYI */ +FLOATFUNC(FSQRTQ); /* v8 */ FLOATFUNC(FADDQ); /* v8 */ FLOATFUNC(FSUBQ); /* v8 */ -FLOATFUNC(FMULQ); /* v8 NYI */ -FLOATFUNC(FDIVQ); /* v8 NYI */ -FLOATFUNC(FDMULQ); /* v8 NYI */ +FLOATFUNC(FMULQ); /* v8 */ +FLOATFUNC(FDIVQ); /* v8 */ +FLOATFUNC(FDMULQ); /* v8 */ FLOATFUNC(FQTOS); /* v8 */ FLOATFUNC(FQTOD); /* v8 */ FLOATFUNC(FITOQ); /* v8 */ @@ -197,7 +125,7 @@ FLOATFUNC(FITOD); /* v6 */ #define FSR_CEXC_SHIFT 0UL #define FSR_CEXC_MASK (0x1fUL << FSR_CEXC_SHIFT) -static int do_one_mathemu(u32 insn, unsigned long *fsr, unsigned long *fregs); +static int do_one_mathemu(u32 insn, unsigned long *fsr, unsigned long *fregs); /* Unlike the Sparc64 version (which has a struct fpustate), we * pass the taskstruct corresponding to the task which currently owns the @@ -210,65 +138,65 @@ static int do_one_mathemu(u32 insn, unsigned long *fsr, unsigned long *fregs); */ int do_mathemu(struct pt_regs *regs, struct task_struct *fpt) { - /* regs->pc isn't necessarily the PC at which the offending insn is sitting. - * The FPU maintains a queue of FPops which cause traps. - * When it hits an instruction that requires that the trapped op succeeded - * (usually because it reads a reg. that the trapped op wrote) then it - * causes this exception. We need to emulate all the insns on the queue - * and then allow the op to proceed. - * This code should also handle the case where the trap was precise, - * in which case the queue length is zero and regs->pc points at the - * single FPop to be emulated. (this case is untested, though :->) - * You'll need this case if you want to be able to emulate all FPops - * because the FPU either doesn't exist or has been software-disabled. - * [The UltraSPARC makes FP a precise trap; this isn't as stupid as it - * might sound because the Ultra does funky things with a superscalar - * architecture.] - */ - - /* You wouldn't believe how often I typed 'ftp' when I meant 'fpt' :-> */ + /* regs->pc isn't necessarily the PC at which the offending insn is sitting. + * The FPU maintains a queue of FPops which cause traps. + * When it hits an instruction that requires that the trapped op succeeded + * (usually because it reads a reg. that the trapped op wrote) then it + * causes this exception. We need to emulate all the insns on the queue + * and then allow the op to proceed. + * This code should also handle the case where the trap was precise, + * in which case the queue length is zero and regs->pc points at the + * single FPop to be emulated. (this case is untested, though :->) + * You'll need this case if you want to be able to emulate all FPops + * because the FPU either doesn't exist or has been software-disabled. + * [The UltraSPARC makes FP a precise trap; this isn't as stupid as it + * might sound because the Ultra does funky things with a superscalar + * architecture.] + */ + + /* You wouldn't believe how often I typed 'ftp' when I meant 'fpt' :-> */ - int i; - int retcode = 0; /* assume all succeed */ - unsigned long insn; - -#ifdef DEBUG_MATHEMU - printk("In do_mathemu()... pc is %08lx\n", regs->pc); - printk("fpqdepth is %ld\n",fpt->tss.fpqdepth); - for (i = 0; i < fpt->tss.fpqdepth; i++) - printk("%d: %08lx at %08lx\n",i,fpt->tss.fpqueue[i].insn, (unsigned long)fpt->tss.fpqueue[i].insn_addr); -#endif + int i; + int retcode = 0; /* assume all succeed */ + unsigned long insn; - if (fpt->tss.fpqdepth == 0) { /* no queue, guilty insn is at regs->pc */ -#ifdef DEBUG_MATHEMU - printk("precise trap at %08lx\n", regs->pc); +#ifdef DEBUG_MATHEMU + printk("In do_mathemu()... pc is %08lx\n", regs->pc); + printk("fpqdepth is %ld\n", fpt->tss.fpqdepth); + for (i = 0; i < fpt->tss.fpqdepth; i++) + printk("%d: %08lx at %08lx\n", i, fpt->tss.fpqueue[i].insn, + (unsigned long)fpt->tss.fpqueue[i].insn_addr); #endif - if (!get_user(insn, (u32 *)regs->pc)) { - retcode = do_one_mathemu(insn, &fpt->tss.fsr, fpt->tss.float_regs); - if (retcode) { - /* in this case we need to fix up PC & nPC */ - regs->pc = regs->npc; - regs->npc += 4; - } - } - return retcode; - } - /* Normal case: need to empty the queue... */ - for (i = 0; i < fpt->tss.fpqdepth; i++) - { - retcode = do_one_mathemu(fpt->tss.fpqueue[i].insn, &(fpt->tss.fsr), fpt->tss.float_regs); - if (!retcode) /* insn failed, no point doing any more */ - break; - } - /* Now empty the queue and clear the queue_not_empty flag */ - if(retcode) - fpt->tss.fsr &= ~(0x3000 | FSR_CEXC_MASK); - else - fpt->tss.fsr &= ~0x3000; - fpt->tss.fpqdepth = 0; - - return retcode; + if (fpt->tss.fpqdepth == 0) { /* no queue, guilty insn is at regs->pc */ +#ifdef DEBUG_MATHEMU + printk("precise trap at %08lx\n", regs->pc); +#endif + if (!get_user(insn, (u32 *)regs->pc)) { + retcode = do_one_mathemu(insn, &fpt->tss.fsr, fpt->tss.float_regs); + if (retcode) { + /* in this case we need to fix up PC & nPC */ + regs->pc = regs->npc; + regs->npc += 4; + } + } + return retcode; + } + + /* Normal case: need to empty the queue... */ + for (i = 0; i < fpt->tss.fpqdepth; i++) { + retcode = do_one_mathemu(fpt->tss.fpqueue[i].insn, &(fpt->tss.fsr), fpt->tss.float_regs); + if (!retcode) /* insn failed, no point doing any more */ + break; + } + /* Now empty the queue and clear the queue_not_empty flag */ + if(retcode) + fpt->tss.fsr &= ~(0x3000 | FSR_CEXC_MASK); + else + fpt->tss.fsr &= ~0x3000; + fpt->tss.fpqdepth = 0; + + return retcode; } /* All routines returning an exception to raise should detect @@ -291,46 +219,36 @@ static int record_exception(unsigned long *pfsr, int eflag) if(would_trap != 0) { eflag &= ((fsr & FSR_TEM_MASK) >> FSR_TEM_SHIFT); if((eflag & (eflag - 1)) != 0) { - if(eflag & EFLAG_INVALID) - eflag = EFLAG_INVALID; - else if(eflag & EFLAG_DIVZERO) - eflag = EFLAG_DIVZERO; - else if(eflag & EFLAG_INEXACT) - eflag = EFLAG_INEXACT; + if(eflag & FP_EX_INVALID) + eflag = FP_EX_INVALID; + else if(eflag & FP_EX_OVERFLOW) + eflag = FP_EX_OVERFLOW; + else if(eflag & FP_EX_UNDERFLOW) + eflag = FP_EX_UNDERFLOW; + else if(eflag & FP_EX_DIVZERO) + eflag = FP_EX_DIVZERO; + else if(eflag & FP_EX_INEXACT) + eflag = FP_EX_INEXACT; } } - /* Set CEXC, here are the rules: + /* Set CEXC, here is the rule: * - * 1) In general all FPU ops will set one and only one + * In general all FPU ops will set one and only one * bit in the CEXC field, this is always the case * when the IEEE exception trap is enabled in TEM. - * - * 2) As a special case, if an overflow or underflow - * is being signalled, AND the trap is not enabled - * in TEM, then the inexact field shall also be set. */ fsr &= ~(FSR_CEXC_MASK); - if(would_trap || - (eflag & (EFLAG_OVERFLOW | EFLAG_UNDERFLOW)) == 0) { - fsr |= ((long)eflag << FSR_CEXC_SHIFT); - } else { - fsr |= (((long)eflag << FSR_CEXC_SHIFT) | - (EFLAG_INEXACT << FSR_CEXC_SHIFT)); - } + fsr |= ((long)eflag << FSR_CEXC_SHIFT); - /* Set the AEXC field, rules are: + /* Set the AEXC field, rule is: * - * 1) If a trap would not be generated, the + * If a trap would not be generated, the * CEXC just generated is OR'd into the * existing value of AEXC. - * - * 2) When a trap is generated, AEXC is cleared. */ if(would_trap == 0) fsr |= ((long)eflag << FSR_AEXC_SHIFT); - else - fsr &= ~(FSR_AEXC_MASK); /* If trapping, indicate fault trap type IEEE. */ if(would_trap != 0) @@ -343,157 +261,150 @@ static int record_exception(unsigned long *pfsr, int eflag) static int do_one_mathemu(u32 insn, unsigned long *fsr, unsigned long *fregs) { - /* Emulate the given insn, updating fsr and fregs appropriately. */ - int type = 0; - /* 01 is single, 10 is double, 11 is quad, - * 000011 is rs1, 001100 is rs2, 110000 is rd (00 in rd is fcc) - * 111100000000 tells which ftt that may happen in - * (this field not used on sparc32 code, as we can't - * extract trap type info for ops on the FP queue) - */ - int freg, eflag; - int (*func)(void *,void *,void *) = NULL; - void *rs1 = NULL, *rs2 = NULL, *rd = NULL; + /* Emulate the given insn, updating fsr and fregs appropriately. */ + int type = 0; + /* 01 is single, 10 is double, 11 is quad, + * 000011 is rs1, 001100 is rs2, 110000 is rd (00 in rd is fcc) + * 111100000000 tells which ftt that may happen in + * (this field not used on sparc32 code, as we can't + * extract trap type info for ops on the FP queue) + */ + int freg, eflag; + int (*func)(void *,void *,void *) = NULL; + void *rs1 = NULL, *rs2 = NULL, *rd = NULL; + +#ifdef DEBUG_MATHEMU + printk("In do_mathemu(), emulating %08lx\n", insn); +#endif + if ((insn & 0xc1f80000) == 0x81a00000) /* FPOP1 */ { + switch ((insn >> 5) & 0x1ff) { + /* QUAD - ftt == 3 */ + case 0x001: type = 0x314; func = FMOVS; break; + case 0x005: type = 0x314; func = FNEGS; break; + case 0x009: type = 0x314; func = FABSS; break; + case 0x02b: type = 0x33c; func = FSQRTQ; break; + case 0x043: type = 0x33f; func = FADDQ; break; + case 0x047: type = 0x33f; func = FSUBQ; break; + case 0x04b: type = 0x33f; func = FMULQ; break; + case 0x04f: type = 0x33f; func = FDIVQ; break; + case 0x06e: type = 0x33a; func = FDMULQ; break; + case 0x0c7: type = 0x31c; func = FQTOS; break; + case 0x0cb: type = 0x32c; func = FQTOD; break; + case 0x0cc: type = 0x334; func = FITOQ; break; + case 0x0cd: type = 0x334; func = FSTOQ; break; + case 0x0ce: type = 0x338; func = FDTOQ; break; + case 0x0d3: type = 0x31c; func = FQTOI; break; + /* SUBNORMAL - ftt == 2 */ + case 0x029: type = 0x214; func = FSQRTS; break; + case 0x02a: type = 0x228; func = FSQRTD; break; + case 0x041: type = 0x215; func = FADDS; break; + case 0x042: type = 0x22a; func = FADDD; break; + case 0x045: type = 0x215; func = FSUBS; break; + case 0x046: type = 0x22a; func = FSUBD; break; + case 0x049: type = 0x215; func = FMULS; break; + case 0x04a: type = 0x22a; func = FMULD; break; + case 0x04d: type = 0x215; func = FDIVS; break; + case 0x04e: type = 0x22a; func = FDIVD; break; + case 0x069: type = 0x225; func = FSMULD; break; + case 0x0c6: type = 0x218; func = FDTOS; break; + case 0x0c9: type = 0x224; func = FSTOD; break; + case 0x0d1: type = 0x214; func = FSTOI; break; + case 0x0d2: type = 0x218; func = FDTOI; break; + default: +#ifdef DEBUG_MATHEMU + printk("unknown FPop1: %03lx\n",(insn>>5)&0x1ff); +#endif + } + } else if ((insn & 0xc1f80000) == 0x81a80000) /* FPOP2 */ { + switch ((insn >> 5) & 0x1ff) { + case 0x051: type = 0x305; func = FCMPS; break; + case 0x052: type = 0x30a; func = FCMPD; break; + case 0x053: type = 0x30f; func = FCMPQ; break; + case 0x055: type = 0x305; func = FCMPES; break; + case 0x056: type = 0x30a; func = FCMPED; break; + case 0x057: type = 0x30f; func = FCMPEQ; break; + default: #ifdef DEBUG_MATHEMU - printk("In do_mathemu(), emulating %08lx\n", insn); -#endif - - if ((insn & 0xc1f80000) == 0x81a00000) /* FPOP1 */ { - switch ((insn >> 5) & 0x1ff) { - /* QUAD - ftt == 3 */ - case 0x001: type = 0x314; func = FMOVS; break; - case 0x005: type = 0x314; func = FNEGS; break; - case 0x009: type = 0x314; func = FABSS; break; - case 0x02b: type = 0x33c; func = FSQRTQ; break; - case 0x043: type = 0x33f; func = FADDQ; break; - case 0x047: type = 0x33f; func = FSUBQ; break; - case 0x04b: type = 0x33f; func = FMULQ; break; - case 0x04f: type = 0x33f; func = FDIVQ; break; - case 0x06e: type = 0x33a; func = FDMULQ; break; - case 0x0c7: type = 0x31c; func = FQTOS; break; - case 0x0cb: type = 0x32c; func = FQTOD; break; - case 0x0cc: type = 0x334; func = FITOQ; break; - case 0x0cd: type = 0x334; func = FSTOQ; break; - case 0x0ce: type = 0x338; func = FDTOQ; break; - case 0x0d3: type = 0x31c; func = FQTOI; break; - /* SUBNORMAL - ftt == 2 */ - case 0x029: type = 0x214; func = FSQRTS; break; - case 0x02a: type = 0x228; func = FSQRTD; break; - case 0x041: type = 0x215; func = FADDS; break; - case 0x042: type = 0x22a; func = FADDD; break; - case 0x045: type = 0x215; func = FSUBS; break; - case 0x046: type = 0x22a; func = FSUBD; break; - case 0x049: type = 0x215; func = FMULS; break; - case 0x04a: type = 0x22a; func = FMULD; break; - case 0x04d: type = 0x215; func = FDIVS; break; - case 0x04e: type = 0x22a; func = FDIVD; break; - case 0x069: type = 0x225; func = FSMULD; break; - case 0x0c6: type = 0x218; func = FDTOS; break; - case 0x0c9: type = 0x224; func = FSTOD; break; - case 0x0d1: type = 0x214; func = FSTOI; break; - case 0x0d2: type = 0x218; func = FDTOI; break; - default: -#ifdef DEBUG_MATHEMU - printk("unknown FPop1: %03lx\n",(insn>>5)&0x1ff); -#endif - } - } - else if ((insn & 0xc1f80000) == 0x81a80000) /* FPOP2 */ { - switch ((insn >> 5) & 0x1ff) { - case 0x051: type = 0x305; func = FCMPS; break; - case 0x052: type = 0x30a; func = FCMPD; break; - case 0x053: type = 0x30f; func = FCMPQ; break; - case 0x055: type = 0x305; func = FCMPES; break; - case 0x056: type = 0x30a; func = FCMPED; break; - case 0x057: type = 0x30f; func = FCMPEQ; break; - default: -#ifdef DEBUG_MATHEMU - printk("unknown FPop2: %03lx\n",(insn>>5)&0x1ff); -#endif - } - } - - if (!type) { /* oops, didn't recognise that FPop */ - printk("attempt to emulate unrecognised FPop!\n"); - return 0; - } - - /* Decode the registers to be used */ - freg = (*fsr >> 14) & 0xf; + printk("unknown FPop2: %03lx\n",(insn>>5)&0x1ff); +#endif + } + } + + if (!type) { /* oops, didn't recognise that FPop */ + printk("attempt to emulate unrecognised FPop!\n"); + return 0; + } + + /* Decode the registers to be used */ + freg = (*fsr >> 14) & 0xf; - *fsr &= ~0x1c000; /* clear the traptype bits */ - - freg = ((insn >> 14) & 0x1f); - switch (type & 0x3) /* is rs1 single, double or quad? */ - { - case 3: - if (freg & 3) /* quadwords must have bits 4&5 of the */ - { /* encoded reg. number set to zero. */ - *fsr |= (6 << 14); - return 0; /* simulate invalid_fp_register exception */ - } - /* fall through */ - case 2: - if (freg & 1) /* doublewords must have bit 5 zeroed */ - { - *fsr |= (6 << 14); - return 0; - } - } - rs1 = (void *)&fregs[freg]; - freg = (insn & 0x1f); - switch ((type >> 2) & 0x3) - { /* same again for rs2 */ - case 3: - if (freg & 3) /* quadwords must have bits 4&5 of the */ - { /* encoded reg. number set to zero. */ - *fsr |= (6 << 14); - return 0; /* simulate invalid_fp_register exception */ - } - /* fall through */ - case 2: - if (freg & 1) /* doublewords must have bit 5 zeroed */ - { - *fsr |= (6 << 14); - return 0; - } - } - rs2 = (void *)&fregs[freg]; - freg = ((insn >> 25) & 0x1f); - switch ((type >> 4) & 0x3) /* and finally rd. This one's a bit different */ - { - case 0: /* dest is fcc. (this must be FCMPQ or FCMPEQ) */ - if (freg) /* V8 has only one set of condition codes, so */ - { /* anything but 0 in the rd field is an error */ - *fsr |= (6 << 14); /* (should probably flag as invalid opcode */ - return 0; /* but SIGFPE will do :-> ) */ - } - rd = (void *)(fsr); /* FCMPQ and FCMPEQ are special and only */ - break; /* set bits they're supposed to :-> */ - case 3: - if (freg & 3) /* quadwords must have bits 4&5 of the */ - { /* encoded reg. number set to zero. */ - *fsr |= (6 << 14); - return 0; /* simulate invalid_fp_register exception */ - } - /* fall through */ - case 2: - if (freg & 1) /* doublewords must have bit 5 zeroed */ - { - *fsr |= (6 << 14); - return 0; - } - /* fall through */ - case 1: - rd = (void *)&fregs[freg]; - break; - } -#ifdef DEBUG_MATHEMU - printk("executing insn...\n"); -#endif - eflag = func(rd, rs2, rs1); /* do the Right Thing */ - if(eflag == 0) - return 1; /* success! */ - return record_exception(fsr, eflag); + *fsr &= ~0x1c000; /* clear the traptype bits */ + + freg = ((insn >> 14) & 0x1f); + switch (type & 0x3) { /* is rs1 single, double or quad? */ + case 3: + if (freg & 3) { /* quadwords must have bits 4&5 of the */ + /* encoded reg. number set to zero. */ + *fsr |= (6 << 14); + return 0; /* simulate invalid_fp_register exception */ + } + /* fall through */ + case 2: + if (freg & 1) { /* doublewords must have bit 5 zeroed */ + *fsr |= (6 << 14); + return 0; + } + } + rs1 = (void *)&fregs[freg]; + freg = (insn & 0x1f); + switch ((type >> 2) & 0x3) { /* same again for rs2 */ + case 3: + if (freg & 3) { /* quadwords must have bits 4&5 of the */ + /* encoded reg. number set to zero. */ + *fsr |= (6 << 14); + return 0; /* simulate invalid_fp_register exception */ + } + /* fall through */ + case 2: + if (freg & 1) { /* doublewords must have bit 5 zeroed */ + *fsr |= (6 << 14); + return 0; + } + } + rs2 = (void *)&fregs[freg]; + freg = ((insn >> 25) & 0x1f); + switch ((type >> 4) & 0x3) { /* and finally rd. This one's a bit different */ + case 0: /* dest is fcc. (this must be FCMPQ or FCMPEQ) */ + if (freg) { /* V8 has only one set of condition codes, so */ + /* anything but 0 in the rd field is an error */ + *fsr |= (6 << 14); /* (should probably flag as invalid opcode */ + return 0; /* but SIGFPE will do :-> ) */ + } + rd = (void *)(fsr); /* FCMPQ and FCMPEQ are special and only */ + break; /* set bits they're supposed to :-> */ + case 3: + if (freg & 3) { /* quadwords must have bits 4&5 of the */ + /* encoded reg. number set to zero. */ + *fsr |= (6 << 14); + return 0; /* simulate invalid_fp_register exception */ + } + /* fall through */ + case 2: + if (freg & 1) { /* doublewords must have bit 5 zeroed */ + *fsr |= (6 << 14); + return 0; + } + /* fall through */ + case 1: + rd = (void *)&fregs[freg]; + break; + } +#ifdef DEBUG_MATHEMU + printk("executing insn...\n"); +#endif + eflag = func(rd, rs2, rs1); /* do the Right Thing */ + if(eflag == 0) + return 1; /* success! */ + return record_exception(fsr, eflag); } diff --git a/arch/sparc/math-emu/sfp-machine.h b/arch/sparc/math-emu/sfp-machine.h index 67a74580c..99448502a 100644 --- a/arch/sparc/math-emu/sfp-machine.h +++ b/arch/sparc/math-emu/sfp-machine.h @@ -1,6 +1,11 @@ -/* Machine-dependent software floating-point definitions. Sparc version. - Copyright (C) 1997 Free Software Foundation, Inc. +/* Machine-dependent software floating-point definitions. + Sparc userland (_Q_*) version. + Copyright (C) 1997,1998,1999 Free Software Foundation, Inc. This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com), + Jakub Jelinek (jj@ultra.linux.cz), + David S. Miller (davem@redhat.com) and + Peter Maydell (pmaydell@chiark.greenend.org.uk). The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -15,382 +20,183 @@ You should have received a copy of the GNU Library General Public License along with the GNU C Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., - 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - Actually, this is a sparc (32bit) version, written based on the - i386 and sparc64 versions, by me, - Peter Maydell (pmaydell@chiark.greenend.org.uk). - Comments are by and large also mine, although they may be inaccurate. - - In picking out asm fragments I've gone with the lowest common - denominator, which also happens to be the hardware I have :-> - That is, a SPARC without hardware multiply and divide. - */ - - -/* basic word size definitions */ +#ifndef _SFP_MACHINE_H +#define _SFP_MACHINE_H + #define _FP_W_TYPE_SIZE 32 #define _FP_W_TYPE unsigned long #define _FP_WS_TYPE signed long #define _FP_I_TYPE long -/* You can optionally code some things like addition in asm. For - * example, i386 defines __FP_FRAC_ADD_2 as asm. If you don't - * then you get a fragment of C code [if you change an #ifdef 0 - * in op-2.h] or a call to add_ssaaaa (see below). - * Good places to look for asm fragments to use are gcc and glibc. - * gcc's longlong.h is useful. - */ +#define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_wide(S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_2_wide(D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) _FP_MUL_MEAT_4_wide(Q,R,X,Y,umul_ppmm) -/* We need to know how to multiply and divide. If the host word size - * is >= 2*fracbits you can use FP_MUL_MEAT_n_imm(t,R,X,Y) which - * codes the multiply with whatever gcc does to 'a * b'. - * _FP_MUL_MEAT_n_wide(t,R,X,Y,f) is used when you have an asm - * function that can multiply two 1W values and get a 2W result. - * Otherwise you're stuck with _FP_MUL_MEAT_n_hard(t,R,X,Y) which - * does bitshifting to avoid overflow. - * For division there is FP_DIV_MEAT_n_imm(t,R,X,Y,f) for word size - * >= 2*fracbits, where f is either _FP_DIV_HELP_imm or - * _FP_DIV_HELP_ldiv (see op-1.h). - * _FP_DIV_MEAT_udiv() is if you have asm to do 2W/1W => (1W, 1W). - * [GCC and glibc have longlong.h which has the asm macro udiv_qrnnd - * to do this.] - * In general, 'n' is the number of words required to hold the type, - * and 't' is either S, D or Q for single/double/quad. - * -- PMM - */ -/* Example: SPARC64: - * #define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_imm(S,R,X,Y) - * #define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_1_wide(D,R,X,Y,umul_ppmm) - * #define _FP_MUL_MEAT_Q(R,X,Y) _FP_MUL_MEAT_2_wide(Q,R,X,Y,umul_ppmm) - * - * #define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm) - * #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv(D,R,X,Y) - * #define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv_64(Q,R,X,Y) - * - * Example: i386: - * #define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_wide(S,R,X,Y,_i386_mul_32_64) - * #define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_2_wide(D,R,X,Y,_i386_mul_32_64) - * - * #define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv(S,R,X,Y,_i386_div_64_32) - * #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv_64(D,R,X,Y) - */ -#define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_wide(S,R,X,Y,umul_ppmm) -#define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_2_wide(D,R,X,Y,umul_ppmm) -/* FIXME: This is not implemented, but should be soon */ -#define _FP_MUL_MEAT_Q(R,X,Y) _FP_FRAC_SET_4(R, _FP_ZEROFRAC_4) -#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv(S,R,X,Y) -#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv_64(D,R,X,Y) -/* FIXME: This is not implemented, but should be soon */ -#define _FP_DIV_MEAT_Q(R,X,Y) _FP_FRAC_SET_4(R, _FP_ZEROFRAC_4) +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv(S,R,X,Y) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) -/* These macros define what NaN looks like. They're supposed to expand to - * a comma-separated set of 32bit unsigned ints that encode NaN. - */ -#define _FP_NANFRAC_S _FP_QNANBIT_S -#define _FP_NANFRAC_D _FP_QNANBIT_D, 0 -#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0 +#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 +#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 #define _FP_KEEPNANFRACP 1 -/* This macro appears to be called when both X and Y are NaNs, and - * has to choose one and copy it to R. i386 goes for the larger of the - * two, sparc64 just picks Y. I don't understand this at all so I'll - * go with sparc64 because it's shorter :-> -- PMM +/* If one NaN is signaling and the other is not, + * we choose that one, otherwise we choose X. + */ +/* For _Qp_* and _Q_*, this should prefer X, for + * CPU instruction emulation this should prefer Y. + * (see SPAMv9 B.2.2 section). */ #define _FP_CHOOSENAN(fs, wc, R, X, Y) \ do { \ - R##_s = Y##_s; \ - _FP_FRAC_COPY_##wc(R,Y); \ + if ((_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)) \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + else \ + { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + } \ R##_c = FP_CLS_NAN; \ } while (0) - -#define __FP_UNPACK_RAW_1(fs, X, val) \ - do { \ - union _FP_UNION_##fs *_flo = \ - (union _FP_UNION_##fs *)val; \ - \ - X##_f = _flo->bits.frac; \ - X##_e = _flo->bits.exp; \ - X##_s = _flo->bits.sign; \ - } while (0) - -#define __FP_UNPACK_RAW_2(fs, X, val) \ - do { \ - union _FP_UNION_##fs *_flo = \ - (union _FP_UNION_##fs *)val; \ - \ - X##_f0 = _flo->bits.frac0; \ - X##_f1 = _flo->bits.frac1; \ - X##_e = _flo->bits.exp; \ - X##_s = _flo->bits.sign; \ - } while (0) -#define __FP_UNPACK_RAW_4(fs, X, val) \ - do { \ - union _FP_UNION_##fs *_flo = \ - (union _FP_UNION_##fs *)val; \ - \ - X##_f[0] = _flo->bits.frac0; \ - X##_f[1] = _flo->bits.frac1; \ - X##_f[2] = _flo->bits.frac2; \ - X##_f[3] = _flo->bits.frac3; \ - X##_e = _flo->bits.exp; \ - X##_s = _flo->bits.sign; \ +/* Some assembly to speed things up. */ +#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ + __asm__ ("addcc %r7,%8,%2 + addxcc %r5,%6,%1 + addx %r3,%4,%0" \ + : "=r" ((USItype)(r2)), \ + "=&r" ((USItype)(r1)), \ + "=&r" ((USItype)(r0)) \ + : "%rJ" ((USItype)(x2)), \ + "rI" ((USItype)(y2)), \ + "%rJ" ((USItype)(x1)), \ + "rI" ((USItype)(y1)), \ + "%rJ" ((USItype)(x0)), \ + "rI" ((USItype)(y0)) \ + : "cc") + +#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ + __asm__ ("subcc %r7,%8,%2 + subxcc %r5,%6,%1 + subx %r3,%4,%0" \ + : "=r" ((USItype)(r2)), \ + "=&r" ((USItype)(r1)), \ + "=&r" ((USItype)(r0)) \ + : "%rJ" ((USItype)(x2)), \ + "rI" ((USItype)(y2)), \ + "%rJ" ((USItype)(x1)), \ + "rI" ((USItype)(y1)), \ + "%rJ" ((USItype)(x0)), \ + "rI" ((USItype)(y0)) \ + : "cc") + +#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ + do { \ + /* We need to fool gcc, as we need to pass more than 10 \ + input/outputs. */ \ + register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2"); \ + __asm__ __volatile__ (" + addcc %r8,%9,%1 + addxcc %r6,%7,%0 + addxcc %r4,%5,%%g2 + addx %r2,%3,%%g1" \ + : "=&r" ((USItype)(r1)), \ + "=&r" ((USItype)(r0)) \ + : "%rJ" ((USItype)(x3)), \ + "rI" ((USItype)(y3)), \ + "%rJ" ((USItype)(x2)), \ + "rI" ((USItype)(y2)), \ + "%rJ" ((USItype)(x1)), \ + "rI" ((USItype)(y1)), \ + "%rJ" ((USItype)(x0)), \ + "rI" ((USItype)(y0)) \ + : "cc", "g1", "g2"); \ + __asm__ __volatile__ ("" : "=r" (_t1), "=r" (_t2)); \ + r3 = _t1; r2 = _t2; \ } while (0) -#define __FP_UNPACK_S(X,val) \ - do { \ - __FP_UNPACK_RAW_1(S,X,val); \ - _FP_UNPACK_CANONICAL(S,1,X); \ +#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ + do { \ + /* We need to fool gcc, as we need to pass more than 10 \ + input/outputs. */ \ + register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2"); \ + __asm__ __volatile__ (" + subcc %r8,%9,%1 + subxcc %r6,%7,%0 + subxcc %r4,%5,%%g2 + subx %r2,%3,%%g1" \ + : "=&r" ((USItype)(r1)), \ + "=&r" ((USItype)(r0)) \ + : "%rJ" ((USItype)(x3)), \ + "rI" ((USItype)(y3)), \ + "%rJ" ((USItype)(x2)), \ + "rI" ((USItype)(y2)), \ + "%rJ" ((USItype)(x1)), \ + "rI" ((USItype)(y1)), \ + "%rJ" ((USItype)(x0)), \ + "rI" ((USItype)(y0)) \ + : "cc", "g1", "g2"); \ + __asm__ __volatile__ ("" : "=r" (_t1), "=r" (_t2)); \ + r3 = _t1; r2 = _t2; \ } while (0) -#define __FP_UNPACK_D(X,val) \ - do { \ - __FP_UNPACK_RAW_2(D,X,val); \ - _FP_UNPACK_CANONICAL(D,2,X); \ - } while (0) - -#define __FP_UNPACK_Q(X,val) \ - do { \ - __FP_UNPACK_RAW_4(Q,X,val); \ - _FP_UNPACK_CANONICAL(Q,4,X); \ - } while (0) - -#define __FP_PACK_RAW_1(fs, val, X) \ - do { \ - union _FP_UNION_##fs *_flo = \ - (union _FP_UNION_##fs *)val; \ - \ - _flo->bits.frac = X##_f; \ - _flo->bits.exp = X##_e; \ - _flo->bits.sign = X##_s; \ - } while (0) - -#define __FP_PACK_RAW_2(fs, val, X) \ - do { \ - union _FP_UNION_##fs *_flo = \ - (union _FP_UNION_##fs *)val; \ - \ - _flo->bits.frac0 = X##_f0; \ - _flo->bits.frac1 = X##_f1; \ - _flo->bits.exp = X##_e; \ - _flo->bits.sign = X##_s; \ - } while (0) - -#define __FP_PACK_RAW_4(fs, val, X) \ - do { \ - union _FP_UNION_##fs *_flo = \ - (union _FP_UNION_##fs *)val; \ - \ - _flo->bits.frac0 = X##_f[0]; \ - _flo->bits.frac1 = X##_f[1]; \ - _flo->bits.frac2 = X##_f[2]; \ - _flo->bits.frac3 = X##_f[3]; \ - _flo->bits.exp = X##_e; \ - _flo->bits.sign = X##_s; \ - } while (0) - -#include <linux/kernel.h> -#include <linux/sched.h> - -/* We only actually write to the destination register - * if exceptions signalled (if any) will not trap. - */ -#ifdef __SMP__ -#define __FPU_TEM \ - (((current->tss.fsr)>>23)&0x1f) -#else +#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) __FP_FRAC_SUB_3(x2,x1,x0,x2,x1,x0,y2,y1,y0) + +#define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) __FP_FRAC_SUB_4(x3,x2,x1,x0,x3,x2,x1,x0,y3,y2,y1,y0) + +#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \ + __asm__ ("addcc %3,%4,%3 + addxcc %2,%%g0,%2 + addxcc %1,%%g0,%1 + addx %0,%%g0,%0" \ + : "=&r" ((USItype)(x3)), \ + "=&r" ((USItype)(x2)), \ + "=&r" ((USItype)(x1)), \ + "=&r" ((USItype)(x0)) \ + : "rI" ((USItype)(i)), \ + "0" ((USItype)(x3)), \ + "1" ((USItype)(x2)), \ + "2" ((USItype)(x1)), \ + "3" ((USItype)(x0)) \ + : "cc") + +#ifndef __SMP__ extern struct task_struct *last_task_used_math; -#define __FPU_TEM \ - (((last_task_used_math->tss.fsr)>>23)&0x1f) #endif -#define __FPU_TRAP_P(bits) \ - ((__FPU_TEM & (bits)) != 0) - -#define __FP_PACK_S(val,X) \ -({ int __exc = _FP_PACK_CANONICAL(S,1,X); \ - if(!__exc || !__FPU_TRAP_P(__exc)) \ - __FP_PACK_RAW_1(S,val,X); \ - __exc; \ -}) - -#define __FP_PACK_D(val,X) \ -({ int __exc = _FP_PACK_CANONICAL(D,2,X); \ - if(!__exc || !__FPU_TRAP_P(__exc)) \ - __FP_PACK_RAW_2(D,val,X); \ - __exc; \ -}) - -#define __FP_PACK_Q(val,X) \ -({ int __exc = _FP_PACK_CANONICAL(Q,4,X); \ - if(!__exc || !__FPU_TRAP_P(__exc)) \ - __FP_PACK_RAW_4(Q,val,X); \ - __exc; \ -}) /* Obtain the current rounding mode. */ +#ifndef FP_ROUNDMODE #ifdef __SMP__ #define FP_ROUNDMODE ((current->tss.fsr >> 30) & 0x3) #else #define FP_ROUNDMODE ((last_task_used_math->tss.fsr >> 30) & 0x3) #endif +#endif -/* the asm fragments go here: all these are taken from glibc-2.0.5's stdlib/longlong.h */ - -#include <linux/types.h> -#include <asm/byteorder.h> - -/* add_ssaaaa is used in op-2.h and should be equivalent to - * #define add_ssaaaa(sh,sl,ah,al,bh,bl) (sh = ah+bh+ (( sl = al+bl) < al)) - * add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, - * high_addend_2, low_addend_2) adds two UWtype integers, composed by - * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 - * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow - * (i.e. carry out) is not stored anywhere, and is lost. - */ -#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ - __asm__ ("addcc %r4,%5,%1 - addx %r2,%3,%0" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "%rJ" ((USItype)(ah)), \ - "rI" ((USItype)(bh)), \ - "%rJ" ((USItype)(al)), \ - "rI" ((USItype)(bl)) \ - : "cc") - - -/* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to - * #define sub_ddmmss(sh, sl, ah, al, bh, bl) (sh = ah-bh - ((sl = al-bl) > al)) - * sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, - * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, - * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and - * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE - * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, - * and is lost. - */ - -#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ - __asm__ ("subcc %r4,%5,%1 - subx %r2,%3,%0" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "rJ" ((USItype)(ah)), \ - "rI" ((USItype)(bh)), \ - "rJ" ((USItype)(al)), \ - "rI" ((USItype)(bl)) \ - : "cc") - - -/* asm fragments for mul and div */ -/* umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two - * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype - * word product in HIGH_PROD and LOW_PROD. - * These look ugly because the sun4/4c don't have umul/udiv/smul/sdiv in - * hardware. - */ -#define umul_ppmm(w1, w0, u, v) \ - __asm__ ("! Inlined umul_ppmm - wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr - sra %3,31,%%g2 ! Don't move this insn - and %2,%%g2,%%g2 ! Don't move this insn - andcc %%g0,0,%%g1 ! Don't move this insn - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,%3,%%g1 - mulscc %%g1,0,%%g1 - add %%g1,%%g2,%0 - rd %%y,%1" \ - : "=r" ((USItype)(w1)), \ - "=r" ((USItype)(w0)) \ - : "%rI" ((USItype)(u)), \ - "r" ((USItype)(v)) \ - : "%g1", "%g2", "cc") - -/* udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, - * denominator) divides a UDWtype, composed by the UWtype integers - * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient - * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less - * than DENOMINATOR for correct operation. If, in addition, the most - * significant bit of DENOMINATOR must be 1, then the pre-processor symbol - * UDIV_NEEDS_NORMALIZATION is defined to 1. - */ - -#define udiv_qrnnd(q, r, n1, n0, d) \ - __asm__ ("! Inlined udiv_qrnnd - mov 32,%%g1 - subcc %1,%2,%%g0 -1: bcs 5f - addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb - sub %1,%2,%1 ! this kills msb of n - addx %1,%1,%1 ! so this can't give carry - subcc %%g1,1,%%g1 -2: bne 1b - subcc %1,%2,%%g0 - bcs 3f - addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb - b 3f - sub %1,%2,%1 ! this kills msb of n -4: sub %1,%2,%1 -5: addxcc %1,%1,%1 - bcc 2b - subcc %%g1,1,%%g1 -! Got carry from n. Subtract next step to cancel this carry. - bne 4b - addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb - sub %1,%2,%1 -3: xnor %0,0,%0 - ! End of inline udiv_qrnnd" \ - : "=&r" ((USItype) (q)), \ - "=&r" ((USItype) (r)) \ - : "r" ((USItype) (d)), \ - "1" ((USItype) (n1)), \ - "0" ((USItype) (n0)) : "%g1", "cc") - -#define UDIV_NEEDS_NORMALIZATION 0 +/* Exception flags. */ +#define FP_EX_INVALID (1 << 4) +#define FP_EX_OVERFLOW (1 << 3) +#define FP_EX_UNDERFLOW (1 << 2) +#define FP_EX_DIVZERO (1 << 1) +#define FP_EX_INEXACT (1 << 0) -#define abort() \ - return 0 +#define FP_HANDLE_EXCEPTIONS return _fex -#ifdef __BIG_ENDIAN -#define __BYTE_ORDER __BIG_ENDIAN +#ifdef __SMP__ +#define FP_INHIBIT_RESULTS ((current->tss.fsr >> 23) & _fex) #else -#define __BYTE_ORDER __LITTLE_ENDIAN +#define FP_INHIBIT_RESULTS ((last_task_used_math->tss.fsr >> 23) & _fex) #endif -/* Exception flags. */ -#define EFLAG_INVALID (1 << 4) -#define EFLAG_OVERFLOW (1 << 3) -#define EFLAG_UNDERFLOW (1 << 2) -#define EFLAG_DIVZERO (1 << 1) -#define EFLAG_INEXACT (1 << 0) +#endif diff --git a/arch/sparc/math-emu/sfp-util.h b/arch/sparc/math-emu/sfp-util.h new file mode 100644 index 000000000..75ec69124 --- /dev/null +++ b/arch/sparc/math-emu/sfp-util.h @@ -0,0 +1,115 @@ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <asm/byteorder.h> + +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addcc %r4,%5,%1 + addx %r2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rJ" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%rJ" ((USItype)(al)), \ + "rI" ((USItype)(bl)) \ + : "cc") +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subcc %r4,%5,%1 + subx %r2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rJ" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "rJ" ((USItype)(al)), \ + "rI" ((USItype)(bl)) \ + : "cc") + +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("! Inlined umul_ppmm + wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr + sra %3,31,%%g2 ! Don't move this insn + and %2,%%g2,%%g2 ! Don't move this insn + andcc %%g0,0,%%g1 ! Don't move this insn + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,0,%%g1 + add %%g1,%%g2,%0 + rd %%y,%1" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "%rI" ((USItype)(u)), \ + "r" ((USItype)(v)) \ + : "%g1", "%g2", "cc") + +/* It's quite necessary to add this much assembler for the sparc. + The default udiv_qrnnd (in C) is more than 10 times slower! */ +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("! Inlined udiv_qrnnd + mov 32,%%g1 + subcc %1,%2,%%g0 +1: bcs 5f + addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb + sub %1,%2,%1 ! this kills msb of n + addx %1,%1,%1 ! so this can't give carry + subcc %%g1,1,%%g1 +2: bne 1b + subcc %1,%2,%%g0 + bcs 3f + addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb + b 3f + sub %1,%2,%1 ! this kills msb of n +4: sub %1,%2,%1 +5: addxcc %1,%1,%1 + bcc 2b + subcc %%g1,1,%%g1 +! Got carry from n. Subtract next step to cancel this carry. + bne 4b + addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb + sub %1,%2,%1 +3: xnor %0,0,%0 + ! End of inline udiv_qrnnd" \ + : "=&r" ((USItype)(q)), \ + "=&r" ((USItype)(r)) \ + : "r" ((USItype)(d)), \ + "1" ((USItype)(n1)), \ + "0" ((USItype)(n0)) : "%g1", "cc") +#define UDIV_NEEDS_NORMALIZATION 0 + +#define abort() \ + return 0 + +#ifdef __BIG_ENDIAN +#define __BYTE_ORDER __BIG_ENDIAN +#else +#define __BYTE_ORDER __LITTLE_ENDIAN +#endif |