Import of Linux/MIPS 1.3.0

author: Ralf Baechle <ralf@linux-mips.org> 1995-11-14 08:00:00 +0000
committer: <ralf@linux-mips.org> 1995-11-14 08:00:00 +0000
commit: e7c2a72e2680827d6a733931273a93461c0d8d1b (patch)
tree: c9abeda78ef7504062bb2e816bcf3e3c9d680112 /arch/i386/math-emu
parent: ec6044459060a8c9ce7f64405c465d141898548c (diff)
45 files changed, 12946 insertions, 0 deletions
diff --git a/arch/i386/math-emu/Makefile b/arch/i386/math-emu/Makefile
new file mode 100644
index 000000000..2d391a9e6
--- /dev/null
+++ b/arch/i386/math-emu/Makefile
@@ -0,0 +1,52 @@
+#
+#               Makefile for wm-FPU-emu
+#
+
+#DEBUG	= -DDEBUGGING
+DEBUG	=
+PARANOID = -DPARANOID
+CFLAGS	:= $(CFLAGS) $(PARANOID) $(DEBUG) -fno-builtin
+
+.c.o:
+	$(CC) $(CFLAGS) $(MATH_EMULATION) -c $<
+
+.S.o:
+	$(CC) -D__ASSEMBLER__ $(PARANOID) -c $<
+
+.s.o:
+	$(CC) -c $<
+
+OBJS =	fpu_entry.o div_small.o errors.o \
+	fpu_arith.o fpu_aux.o fpu_etc.o fpu_trig.o \
+	load_store.o get_address.o \
+	poly_atan.o poly_l2.o poly_2xm1.o poly_sin.o poly_tan.o \
+	reg_add_sub.o reg_compare.o reg_constant.o reg_ld_str.o \
+	reg_div.o reg_mul.o reg_norm.o \
+	reg_u_add.o reg_u_div.o reg_u_mul.o reg_u_sub.o \
+	reg_round.o \
+	wm_shrx.o wm_sqrt.o \
+	div_Xsig.o polynom_Xsig.o round_Xsig.o \
+	shr_Xsig.o mul_Xsig.o
+
+math.a: $(OBJS)
+	rm -f math.a
+	$(AR) rcs math.a $(OBJS)
+	sync
+
+dep:
+	$(CPP) -M *.c > .depend
+	$(CPP) -D__ASSEMBLER__ -M *.S >> .depend
+
+proto:
+	cproto -e -DMAKING_PROTO *.c >fpu_proto.h
+
+modules:
+
+dummy:
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff --git a/arch/i386/math-emu/README b/arch/i386/math-emu/README
new file mode 100644
index 000000000..2c0acb423
--- /dev/null
+++ b/arch/i386/math-emu/README
@@ -0,0 +1,436 @@
+ +---------------------------------------------------------------------------+
+ |  wm-FPU-emu   an FPU emulator for 80386 and 80486SX microprocessors.      |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |    This program is free software; you can redistribute it and/or modify   |
+ |    it under the terms of the GNU General Public License version 2 as      |
+ |    published by the Free Software Foundation.                             |
+ |                                                                           |
+ |    This program is distributed in the hope that it will be useful,        |
+ |    but WITHOUT ANY WARRANTY; without even the implied warranty of         |
+ |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          |
+ |    GNU General Public License for more details.                           |
+ |                                                                           |
+ |    You should have received a copy of the GNU General Public License      |
+ |    along with this program; if not, write to the Free Software            |
+ |    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              |
+ |                                                                           |
+ +---------------------------------------------------------------------------+
+
+
+
+wm-FPU-emu is an FPU emulator for Linux. It is derived from wm-emu387
+which was my 80387 emulator for early versions of djgpp (gcc under
+msdos); wm-emu387 was in turn based upon emu387 which was written by
+DJ Delorie for djgpp.  The interface to the Linux kernel is based upon
+the original Linux math emulator by Linus Torvalds.
+
+My target FPU for wm-FPU-emu is that described in the Intel486
+Programmer's Reference Manual (1992 edition). Unfortunately, numerous
+facets of the functioning of the FPU are not well covered in the
+Reference Manual. The information in the manual has been supplemented
+with measurements on real 80486's. Unfortunately, it is simply not
+possible to be sure that all of the peculiarities of the 80486 have
+been discovered, so there is always likely to be obscure differences
+in the detailed behaviour of the emulator and a real 80486.
+
+wm-FPU-emu does not implement all of the behaviour of the 80486 FPU,
+but is very close.  See "Limitations" later in this file for a list of
+some differences.
+
+Please report bugs, etc to me at:
+       billm@vaxc.cc.monash.edu.au
+  or at:
+       billm@jacobi.maths.monash.edu.au
+
+
+--Bill Metzenthen
+  August 1994
+
+
+----------------------- Internals of wm-FPU-emu -----------------------
+
+Numeric algorithms:
+(1) Add, subtract, and multiply. Nothing remarkable in these.
+(2) Divide has been tuned to get reasonable performance. The algorithm
+    is not the obvious one which most people seem to use, but is designed
+    to take advantage of the characteristics of the 80386. I expect that
+    it has been invented many times before I discovered it, but I have not
+    seen it. It is based upon one of those ideas which one carries around
+    for years without ever bothering to check it out.
+(3) The sqrt function has been tuned to get good performance. It is based
+    upon Newton's classic method. Performance was improved by capitalizing
+    upon the properties of Newton's method, and the code is once again
+    structured taking account of the 80386 characteristics.
+(4) The trig, log, and exp functions are based in each case upon quasi-
+    "optimal" polynomial approximations. My definition of "optimal" was
+    based upon getting good accuracy with reasonable speed.
+(5) The argument reducing code for the trig function effectively uses
+    a value of pi which is accurate to more than 128 bits. As a consequence,
+    the reduced argument is accurate to more than 64 bits for arguments up
+    to a few pi, and accurate to more than 64 bits for most arguments,
+    even for arguments approaching 2^63. This is far superior to an
+    80486, which uses a value of pi which is accurate to 66 bits.
+
+The code of the emulator is complicated slightly by the need to
+account for a limited form of re-entrancy. Normally, the emulator will
+emulate each FPU instruction to completion without interruption.
+However, it may happen that when the emulator is accessing the user
+memory space, swapping may be needed. In this case the emulator may be
+temporarily suspended while disk i/o takes place. During this time
+another process may use the emulator, thereby perhaps changing static
+variables. The code which accesses user memory is confined to five
+files:
+    fpu_entry.c
+    reg_ld_str.c
+    load_store.c
+    get_address.c
+    errors.c
+As from version 1.12 of the emulator, no static variables are used
+(apart from those in the kernel's per-process tables). The emulator is
+therefore now fully re-entrant, rather than having just the restricted
+form of re-entrancy which is required by the Linux kernel.
+
+----------------------- Limitations of wm-FPU-emu -----------------------
+
+There are a number of differences between the current wm-FPU-emu
+(version 1.20) and the 80486 FPU (apart from bugs). Some of the more
+important differences are listed below:
+
+The Roundup flag does not have much meaning for the transcendental
+functions and its 80486 value with these functions is likely to differ
+from its emulator value.
+
+In a few rare cases the Underflow flag obtained with the emulator will
+be different from that obtained with an 80486. This occurs when the
+following conditions apply simultaneously:
+(a) the operands have a higher precision than the current setting of the
+    precision control (PC) flags.
+(b) the underflow exception is masked.
+(c) the magnitude of the exact result (before rounding) is less than 2^-16382.
+(d) the magnitude of the final result (after rounding) is exactly 2^-16382.
+(e) the magnitude of the exact result would be exactly 2^-16382 if the
+    operands were rounded to the current precision before the arithmetic
+    operation was performed.
+If all of these apply, the emulator will set the Underflow flag but a real
+80486 will not.
+
+NOTE: Certain formats of Extended Real are UNSUPPORTED. They are
+unsupported by the 80486. They are the Pseudo-NaNs, Pseudoinfinities,
+and Unnormals. None of these will be generated by an 80486 or by the
+emulator. Do not use them. The emulator treats them differently in
+detail from the way an 80486 does.
+
+The emulator treats PseudoDenormals differently from an 80486. These
+numbers are in fact properly normalised numbers with the exponent
+offset by 1, and the emulator treats them as such. Unlike the 80486,
+the emulator does not generate a Denormal Operand exception for these
+numbers. The arithmetical results produced when using such a number as
+an operand are the same for the emulator and a real 80486 (apart from
+any slight precision difference for the transcendental functions).
+Neither the emulator nor an 80486 produces one of these numbers as the
+result of any arithmetic operation. An 80486 can keep one of these
+numbers in an FPU register with its identity as a PseudoDenormal, but
+the emulator will not; they are always converted to a valid number.
+
+Self modifying code can cause the emulator to fail. An example of such
+code is:
+          movl %esp,[%ebx]
+	  fld1
+The FPU instruction may be (usually will be) loaded into the pre-fetch
+queue of the cpu before the mov instruction is executed. If the
+destination of the 'movl' overlaps the FPU instruction then the bytes
+in the prefetch queue and memory will be inconsistent when the FPU
+instruction is executed. The emulator will be invoked but will not be
+able to find the instruction which caused the device-not-present
+exception. For this case, the emulator cannot emulate the behaviour of
+an 80486DX.
+
+Handling of the address size override prefix byte (0x67) has not been
+extensively tested yet. A major problem exists because using it in
+vm86 mode can cause a general protection fault. Address offsets
+greater than 0xffff appear to be illegal in vm86 mode but are quite
+acceptable (and work) in real mode. A small test program developed to
+check the addressing, and which runs successfully in real mode,
+crashes dosemu under Linux and also brings Windows down with a general
+protection fault message when run under the MS-DOS prompt of Windows
+3.1. (The program simply reads data from a valid address).
+
+The emulator supports 16-bit protected mode, with one difference from
+an 80486DX.  A 80486DX will allow some floating point instructions to
+write a few bytes below the lowest address of the stack.  The emulator
+will not allow this in 16-bit protected mode: no instructions are
+allowed to write outside the bounds set by the protection.
+
+----------------------- Performance of wm-FPU-emu -----------------------
+
+Speed.
+-----
+
+The speed of floating point computation with the emulator will depend
+upon instruction mix. Relative performance is best for the instructions
+which require most computation. The simple instructions are adversely
+affected by the fpu instruction trap overhead.
+
+
+Timing: Some simple timing tests have been made on the emulator functions.
+The times include load/store instructions. All times are in microseconds
+measured on a 33MHz 386 with 64k cache. The Turbo C tests were under
+ms-dos, the next two columns are for emulators running with the djgpp
+ms-dos extender. The final column is for wm-FPU-emu in Linux 0.97,
+using libm4.0 (hard).
+
+function      Turbo C        djgpp 1.06        WM-emu387     wm-FPU-emu
+
+   +          60.5           154.8              76.5          139.4
+   -          61.1-65.5      157.3-160.8        76.2-79.5     142.9-144.7
+   *          71.0           190.8              79.6          146.6
+   /          61.2-75.0      261.4-266.9        75.3-91.6     142.2-158.1
+
+ sin()        310.8          4692.0            319.0          398.5
+ cos()        284.4          4855.2            308.0          388.7
+ tan()        495.0          8807.1            394.9          504.7
+ atan()       328.9          4866.4            601.1          419.5-491.9
+
+ sqrt()       128.7          crashed           145.2          227.0
+ log()        413.1-419.1    5103.4-5354.21    254.7-282.2    409.4-437.1
+ exp()        479.1          6619.2            469.1          850.8
+
+
+The performance under Linux is improved by the use of look-ahead code.
+The following results show the improvement which is obtained under
+Linux due to the look-ahead code. Also given are the times for the
+original Linux emulator with the 4.1 'soft' lib.
+
+ [ Linus' note: I changed look-ahead to be the default under linux, as
+   there was no reason not to use it after I had edited it to be
+   disabled during tracing ]
+
+            wm-FPU-emu w     original w
+            look-ahead       'soft' lib
+   +         106.4             190.2
+   -         108.6-111.6      192.4-216.2
+   *         113.4             193.1
+   /         108.8-124.4      700.1-706.2
+
+ sin()       390.5            2642.0
+ cos()       381.5            2767.4
+ tan()       496.5            3153.3
+ atan()      367.2-435.5     2439.4-3396.8
+
+ sqrt()      195.1            4732.5
+ log()       358.0-387.5     3359.2-3390.3
+ exp()       619.3            4046.4
+
+
+These figures are now somewhat out-of-date. The emulator has become
+progressively slower for most functions as more of the 80486 features
+have been implemented.
+
+
+----------------------- Accuracy of wm-FPU-emu -----------------------
+
+
+The accuracy of the emulator is in almost all cases equal to or better
+than that of an Intel 80486 FPU.
+
+The results of the basic arithmetic functions (+,-,*,/), and fsqrt
+match those of an 80486 FPU. They are the best possible; the error for
+these never exceeds 1/2 an lsb. The fprem and fprem1 instructions
+return exact results; they have no error.
+
+
+The following table compares the emulator accuracy for the sqrt(),
+trig and log functions against the Turbo C "emulator". For this table,
+each function was tested at about 400 points. Ideal worst-case results
+would be 64 bits. The reduced Turbo C accuracy of cos() and tan() for
+arguments greater than pi/4 can be thought of as being related to the
+precision of the argument x; e.g. an argument of pi/2-(1e-10) which is
+accurate to 64 bits can result in a relative accuracy in cos() of
+about 64 + log2(cos(x)) = 31 bits.
+
+
+Function      Tested x range            Worst result                Turbo C
+                                        (relative bits)
+
+sqrt(x)       1 .. 2                    64.1                         63.2
+atan(x)       1e-10 .. 200              64.2                         62.8
+cos(x)        0 .. pi/2-(1e-10)         64.4 (x <= pi/4)             62.4
+                                        64.1 (x = pi/2-(1e-10))      31.9
+sin(x)        1e-10 .. pi/2             64.0                         62.8
+tan(x)        1e-10 .. pi/2-(1e-10)     64.0 (x <= pi/4)             62.1
+                                        64.1 (x = pi/2-(1e-10))      31.9
+exp(x)        0 .. 1                    63.1 **                      62.9
+log(x)        1+1e-6 .. 2               63.8 **                      62.1
+
+** The accuracy for exp() and log() is low because the FPU (emulator)
+does not compute them directly; two operations are required.
+
+
+The emulator passes the "paranoia" tests (compiled with gcc 2.3.3 or
+later) for 'float' variables (24 bit precision numbers) when precision
+control is set to 24, 53 or 64 bits, and for 'double' variables (53
+bit precision numbers) when precision control is set to 53 bits (a
+properly performing FPU cannot pass the 'paranoia' tests for 'double'
+variables when precision control is set to 64 bits).
+
+The code for reducing the argument for the trig functions (fsin, fcos,
+fptan and fsincos) has been improved and now effectively uses a value
+for pi which is accurate to more than 128 bits precision. As a
+consequence, the accuracy of these functions for large arguments has
+been dramatically improved (and is now very much better than an 80486
+FPU). There is also now no degradation of accuracy for fcos and fptan
+for operands close to pi/2. Measured results are (note that the
+definition of accuracy has changed slightly from that used for the
+above table):
+
+Function      Tested x range          Worst result
+                                     (absolute bits)
+
+cos(x)        0 .. 9.22e+18              62.0
+sin(x)        1e-16 .. 9.22e+18          62.1
+tan(x)        1e-16 .. 9.22e+18          61.8
+
+It is possible with some effort to find very large arguments which
+give much degraded precision. For example, the integer number
+           8227740058411162616.0
+is within about 10e-7 of a multiple of pi. To find the tan (for
+example) of this number to 64 bits precision it would be necessary to
+have a value of pi which had about 150 bits precision. The FPU
+emulator computes the result to about 42.6 bits precision (the correct
+result is about -9.739715e-8). On the other hand, an 80486 FPU returns
+0.01059, which in relative terms is hopelessly inaccurate.
+
+For arguments close to critical angles (which occur at multiples of
+pi/2) the emulator is more accurate than an 80486 FPU. For very large
+arguments, the emulator is far more accurate.
+
+
+Prior to version 1.20 of the emulator, the accuracy of the results for
+the transcendental functions (in their principal range) was not as
+good as the results from an 80486 FPU. From version 1.20, the accuracy
+has been considerably improved and these functions now give measured
+worst-case results which are better than the worst-case results given
+by an 80486 FPU.
+
+The following table gives the measured results for the emulator. The
+number of randomly selected arguments in each case is about half a
+million.  The group of three columns gives the frequency of the given
+accuracy in number of times per million, thus the second of these
+columns shows that an accuracy of between 63.80 and 63.89 bits was
+found at a rate of 133 times per one million measurements for fsin.
+The results show that the fsin, fcos and fptan instructions return
+results which are in error (i.e. less accurate than the best possible
+result (which is 64 bits)) for about one per cent of all arguments
+between -pi/2 and +pi/2.  The other instructions have a lower
+frequency of results which are in error.  The last two columns give
+the worst accuracy which was found (in bits) and the approximate value
+of the argument which produced it.
+
+                                frequency (per M)
+                               -------------------   ---------------
+instr   arg range    # tests   63.7   63.8    63.9   worst   at arg
+                               bits   bits    bits    bits
+-----  ------------  -------   ----   ----   -----   -----  --------
+fsin     (0,pi/2)     547756      0    133   10673   63.89  0.451317
+fcos     (0,pi/2)     547563      0    126   10532   63.85  0.700801
+fptan    (0,pi/2)     536274     11    267   10059   63.74  0.784876
+fpatan  4 quadrants   517087      0      8    1855   63.88  0.435121 (4q)
+fyl2x     (0,20)      541861      0      0    1323   63.94  1.40923  (x)
+fyl2xp1 (-.293,.414)  520256      0      0    5678   63.93  0.408542 (x)
+f2xm1     (-1,1)      538847      4    481    6488   63.79  0.167709
+
+
+Tests performed on an 80486 FPU showed results of lower accuracy. The
+following table gives the results which were obtained with an AMD
+486DX2/66 (other tests indicate that an Intel 486DX produces
+identical results).  The tests were basically the same as those used
+to measure the emulator (the values, being random, were in general not
+the same).  The total number of tests for each instruction are given
+at the end of the table, in case each about 100k tests were performed.
+Another line of figures at the end of the table shows that most of the
+instructions return results which are in error for more than 10
+percent of the arguments tested.
+
+The numbers in the body of the table give the approx number of times a
+result of the given accuracy in bits (given in the left-most column)
+was obtained per one million arguments. For three of the instructions,
+two columns of results are given: * The second column for f2xm1 gives
+the number cases where the results of the first column were for a
+positive argument, this shows that this instruction gives better
+results for positive arguments than it does for negative.  * In the
+cases of fcos and fptan, the first column gives the results when all
+cases where arguments greater than 1.5 were removed from the results
+given in the second column. Unlike the emulator, an 80486 FPU returns
+results of relatively poor accuracy for these instructions when the
+argument approaches pi/2. The table does not show those cases when the
+accuracy of the results were less than 62 bits, which occurs quite
+often for fsin and fptan when the argument approaches pi/2. This poor
+accuracy is discussed above in relation to the Turbo C "emulator", and
+the accuracy of the value of pi.
+
+
+bits   f2xm1  f2xm1 fpatan   fcos   fcos  fyl2x fyl2xp1  fsin  fptan  fptan
+62.0       0      0      0      0    437      0      0      0      0    925
+62.1       0      0     10      0    894      0      0      0      0   1023
+62.2      14      0      0      0   1033      0      0      0      0    945
+62.3      57      0      0      0   1202      0      0      0      0   1023
+62.4     385      0      0     10   1292      0     23      0      0   1178
+62.5    1140      0      0    119   1649      0     39      0      0   1149
+62.6    2037      0      0    189   1620      0     16      0      0   1169
+62.7    5086     14      0    646   2315     10    101     35     39   1402
+62.8    8818     86      0    984   3050     59    287    131    224   2036
+62.9   11340   1355      0   2126   4153     79    605    357    321   1948
+63.0   15557   4750      0   3319   5376    246   1281    862    808   2688
+63.1   20016   8288      0   4620   6628    511   2569   1723   1510   3302
+63.2   24945  11127     10   6588   8098   1120   4470   2968   2990   4724
+63.3   25686  12382     69   8774  10682   1906   6775   4482   5474   7236
+63.4   29219  14722     79  11109  12311   3094   9414   7259   8912  10587
+63.5   30458  14936    393  13802  15014   5874  12666   9609  13762  15262
+63.6   32439  16448   1277  17945  19028  10226  15537  14657  19158  20346
+63.7   35031  16805   4067  23003  23947  18910  20116  21333  25001  26209
+63.8   33251  15820   7673  24781  25675  24617  25354  24440  29433  30329
+63.9   33293  16833  18529  28318  29233  31267  31470  27748  29676  30601
+
+Per cent with error:
+        30.9           3.2          18.5    9.8   13.1   11.6          17.4
+Total arguments tested:
+       70194  70099 101784 100641 100641 101799 128853 114893 102675 102675
+
+
+------------------------- Contributors -------------------------------
+
+A number of people have contributed to the development of the
+emulator, often by just reporting bugs, sometimes with suggested
+fixes, and a few kind people have provided me with access in one way
+or another to an 80486 machine. Contributors include (to those people
+who I may have forgotten, please forgive me):
+
+Linus Torvalds
+Tommy.Thorn@daimi.aau.dk
+Andrew.Tridgell@anu.edu.au
+Nick Holloway, alfie@dcs.warwick.ac.uk
+Hermano Moura, moura@dcs.gla.ac.uk
+Jon Jagger, J.Jagger@scp.ac.uk
+Lennart Benschop
+Brian Gallew, geek+@CMU.EDU
+Thomas Staniszewski, ts3v+@andrew.cmu.edu
+Martin Howell, mph@plasma.apana.org.au
+M Saggaf, alsaggaf@athena.mit.edu
+Peter Barker, PETER@socpsy.sci.fau.edu
+tom@vlsivie.tuwien.ac.at
+Dan Russel, russed@rpi.edu
+Daniel Carosone, danielce@ee.mu.oz.au
+cae@jpmorgan.com
+Hamish Coleman, t933093@minyos.xx.rmit.oz.au
+Bruce Evans, bde@kralizec.zeta.org.au
+Timo Korvola, Timo.Korvola@hut.fi
+Rick Lyons, rick@razorback.brisnet.org.au
+Rick, jrs@world.std.com
+ 
+...and numerous others who responded to my request for help with
+a real 80486.
+
diff --git a/arch/i386/math-emu/control_w.h b/arch/i386/math-emu/control_w.h
new file mode 100644
index 000000000..ef5fced39
--- /dev/null
+++ b/arch/i386/math-emu/control_w.h
@@ -0,0 +1,45 @@
+/*---------------------------------------------------------------------------+
+ |  control_w.h                                                              |
+ |                                                                           |
+ | Copyright (C) 1992,1993                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#ifndef _CONTROLW_H_
+#define _CONTROLW_H_
+
+#ifdef __ASSEMBLER__
+#define	_Const_(x)	$##x
+#else
+#define	_Const_(x)	x
+#endif
+
+#define CW_RC		_Const_(0x0C00)	/* rounding control */
+#define CW_PC		_Const_(0x0300)	/* precision control */
+
+#define CW_Precision	Const_(0x0020)	/* loss of precision mask */
+#define CW_Underflow	Const_(0x0010)	/* underflow mask */
+#define CW_Overflow	Const_(0x0008)	/* overflow mask */
+#define CW_ZeroDiv	Const_(0x0004)	/* divide by zero mask */
+#define CW_Denormal	Const_(0x0002)	/* denormalized operand mask */
+#define CW_Invalid	Const_(0x0001)	/* invalid operation mask */
+
+#define CW_Exceptions  	_Const_(0x003f)	/* all masks */
+
+#define RC_RND		_Const_(0x0000)
+#define RC_DOWN		_Const_(0x0400)
+#define RC_UP		_Const_(0x0800)
+#define RC_CHOP		_Const_(0x0C00)
+
+/* p 15-5: Precision control bits affect only the following:
+   ADD, SUB(R), MUL, DIV(R), and SQRT */
+#define PR_24_BITS        _Const_(0x000)
+#define PR_53_BITS        _Const_(0x200)
+#define PR_64_BITS        _Const_(0x300)
+#define PR_RESERVED_BITS  _Const_(0x100)
+/* FULL_PRECISION simulates all exceptions masked */
+#define FULL_PRECISION  (PR_64_BITS | RC_RND | 0x3f)
+
+#endif _CONTROLW_H_
diff --git a/arch/i386/math-emu/div_Xsig.S b/arch/i386/math-emu/div_Xsig.S
new file mode 100644
index 000000000..67d8be964
--- /dev/null
+++ b/arch/i386/math-emu/div_Xsig.S
@@ -0,0 +1,369 @@
+	.file	"div_Xsig.S"
+/*---------------------------------------------------------------------------+
+ |  div_Xsig.S                                                               |
+ |                                                                           |
+ | Division subroutine for 96 bit quantities                                 |
+ |                                                                           |
+ | Copyright (C) 1994                                                        |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and  |
+ | put the 96 bit result at the location d.                                  |
+ |                                                                           |
+ | The result may not be accurate to 96 bits. It is intended for use where   |
+ | a result better than 64 bits is required. The result should usually be    |
+ | good to at least 94 bits.                                                 |
+ | The returned result is actually divided by one half. This is done to      |
+ | prevent overflow.                                                         |
+ |                                                                           |
+ |  .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb  ->  .dddddddddddd                      |
+ |                                                                           |
+ |  void div_Xsig(Xsig *a, Xsig *b, Xsig *dest)                              |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "fpu_asm.h"
+
+
+#define	XsigLL(x)	(x)
+#define	XsigL(x)	4(x)
+#define	XsigH(x)	8(x)
+
+
+#ifndef NON_REENTRANT_FPU
+/*
+	Local storage on the stack:
+	Accumulator:	FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
+ */
+#define FPU_accum_3	-4(%ebp)
+#define FPU_accum_2	-8(%ebp)
+#define FPU_accum_1	-12(%ebp)
+#define FPU_accum_0	-16(%ebp)
+#define FPU_result_3	-20(%ebp)
+#define FPU_result_2	-24(%ebp)
+#define FPU_result_1	-28(%ebp)
+
+#else
+.data
+/*
+	Local storage in a static area:
+	Accumulator:	FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
+ */
+	.align 2,0
+FPU_accum_3:
+	.long	0
+FPU_accum_2:
+	.long	0
+FPU_accum_1:
+	.long	0
+FPU_accum_0:
+	.long	0
+FPU_result_3:
+	.long	0
+FPU_result_2:
+	.long	0
+FPU_result_1:
+	.long	0
+#endif NON_REENTRANT_FPU
+
+
+.text
+	.align 2,144
+
+.globl _div_Xsig
+
+_div_Xsig:
+	pushl	%ebp
+	movl	%esp,%ebp
+#ifndef NON_REENTRANT_FPU
+	subl	$28,%esp
+#endif NON_REENTRANT_FPU
+
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM1,%esi	/* pointer to num */
+	movl	PARAM2,%ebx	/* pointer to denom */
+
+#ifdef PARANOID
+	testl	$0x80000000, XsigH(%ebx)	/* Divisor */
+	je	L_bugged
+#endif PARANOID
+
+
+/*---------------------------------------------------------------------------+
+ |  Divide:   Return  arg1/arg2 to arg3.                                     |
+ |                                                                           |
+ |  The maximum returned value is (ignoring exponents)                       |
+ |               .ffffffff ffffffff                                          |
+ |               ------------------  =  1.ffffffff fffffffe                  |
+ |               .80000000 00000000                                          |
+ | and the minimum is                                                        |
+ |               .80000000 00000000                                          |
+ |               ------------------  =  .80000000 00000001   (rounded)       |
+ |               .ffffffff ffffffff                                          |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+	/* Save extended dividend in local register */
+
+	/* Divide by 2 to prevent overflow */
+	clc
+	movl	XsigH(%esi),%eax
+	rcrl	%eax
+	movl	%eax,FPU_accum_3
+	movl	XsigL(%esi),%eax
+	rcrl	%eax
+	movl	%eax,FPU_accum_2
+	movl	XsigLL(%esi),%eax
+	rcrl	%eax
+	movl	%eax,FPU_accum_1
+	movl	$0,%eax
+	rcrl	%eax
+	movl	%eax,FPU_accum_0
+
+	movl	FPU_accum_2,%eax	/* Get the current num */
+	movl	FPU_accum_3,%edx
+
+/*----------------------------------------------------------------------*/
+/* Initialization done.
+   Do the first 32 bits. */
+
+	/* We will divide by a number which is too large */
+	movl	XsigH(%ebx),%ecx
+	addl	$1,%ecx
+	jnc	LFirst_div_not_1
+
+	/* here we need to divide by 100000000h,
+	   i.e., no division at all.. */
+	mov	%edx,%eax
+	jmp	LFirst_div_done
+
+LFirst_div_not_1:
+	divl	%ecx		/* Divide the numerator by the augmented
+				   denom ms dw */
+
+LFirst_div_done:
+	movl	%eax,FPU_result_3	/* Put the result in the answer */
+
+	mull	XsigH(%ebx)	/* mul by the ms dw of the denom */
+
+	subl	%eax,FPU_accum_2	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_3
+
+	movl	FPU_result_3,%eax	/* Get the result back */
+	mull	XsigL(%ebx)	/* now mul the ls dw of the denom */
+
+	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_2
+	sbbl	$0,FPU_accum_3
+	je	LDo_2nd_32_bits		/* Must check for non-zero result here */
+
+#ifdef PARANOID
+	jb	L_bugged_1
+#endif PARANOID
+
+	/* need to subtract another once of the denom */
+	incl	FPU_result_3	/* Correct the answer */
+
+	movl	XsigL(%ebx),%eax
+	movl	XsigH(%ebx),%edx
+	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_2
+
+#ifdef PARANOID
+	sbbl	$0,FPU_accum_3
+	jne	L_bugged_1	/* Must check for non-zero result here */
+#endif PARANOID
+
+/*----------------------------------------------------------------------*/
+/* Half of the main problem is done, there is just a reduced numerator
+   to handle now.
+   Work with the second 32 bits, FPU_accum_0 not used from now on */
+LDo_2nd_32_bits:
+	movl	FPU_accum_2,%edx	/* get the reduced num */
+	movl	FPU_accum_1,%eax
+
+	/* need to check for possible subsequent overflow */
+	cmpl	XsigH(%ebx),%edx
+	jb	LDo_2nd_div
+	ja	LPrevent_2nd_overflow
+
+	cmpl	XsigL(%ebx),%eax
+	jb	LDo_2nd_div
+
+LPrevent_2nd_overflow:
+/* The numerator is greater or equal, would cause overflow */
+	/* prevent overflow */
+	subl	XsigL(%ebx),%eax
+	sbbl	XsigH(%ebx),%edx
+	movl	%edx,FPU_accum_2
+	movl	%eax,FPU_accum_1
+
+	incl	FPU_result_3	/* Reflect the subtraction in the answer */
+
+#ifdef PARANOID
+	je	L_bugged_2	/* Can't bump the result to 1.0 */
+#endif PARANOID
+
+LDo_2nd_div:
+	cmpl	$0,%ecx		/* augmented denom msw */
+	jnz	LSecond_div_not_1
+
+	/* %ecx == 0, we are dividing by 1.0 */
+	mov	%edx,%eax
+	jmp	LSecond_div_done
+
+LSecond_div_not_1:
+	divl	%ecx		/* Divide the numerator by the denom ms dw */
+
+LSecond_div_done:
+	movl	%eax,FPU_result_2	/* Put the result in the answer */
+
+	mull	XsigH(%ebx)	/* mul by the ms dw of the denom */
+
+	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_2
+
+#ifdef PARANOID
+	jc	L_bugged_2
+#endif PARANOID
+
+	movl	FPU_result_2,%eax	/* Get the result back */
+	mull	XsigL(%ebx)	/* now mul the ls dw of the denom */
+
+	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	$0,FPU_accum_2
+
+#ifdef PARANOID
+	jc	L_bugged_2
+#endif PARANOID
+
+	jz	LDo_3rd_32_bits
+
+#ifdef PARANOID
+	cmpl	$1,FPU_accum_2
+	jne	L_bugged_2
+#endif PARANOID
+
+	/* need to subtract another once of the denom */
+	movl	XsigL(%ebx),%eax
+	movl	XsigH(%ebx),%edx
+	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_1
+	sbbl	$0,FPU_accum_2
+
+#ifdef PARANOID
+	jc	L_bugged_2
+	jne	L_bugged_2
+#endif PARANOID
+
+	addl	$1,FPU_result_2	/* Correct the answer */
+	adcl	$0,FPU_result_3
+
+#ifdef PARANOID
+	jc	L_bugged_2	/* Must check for non-zero result here */
+#endif PARANOID
+
+/*----------------------------------------------------------------------*/
+/* The division is essentially finished here, we just need to perform
+   tidying operations.
+   Deal with the 3rd 32 bits */
+LDo_3rd_32_bits:
+	/* We use an approximation for the third 32 bits.
+	To take account of the 3rd 32 bits of the divisor
+	(call them del), we subtract  del * (a/b) */
+
+	movl	FPU_result_3,%eax	/* a/b */
+	mull	XsigLL(%ebx)		/* del */
+
+	subl	%edx,FPU_accum_1
+
+	/* A borrow indicates that the result is negative */
+	jnb	LTest_over
+
+	movl	XsigH(%ebx),%edx
+	addl	%edx,FPU_accum_1
+
+	subl	$1,FPU_result_2		/* Adjust the answer */
+	sbbl	$0,FPU_result_3
+
+	/* The above addition might not have been enough, check again. */
+	movl	FPU_accum_1,%edx	/* get the reduced num */
+	cmpl	XsigH(%ebx),%edx	/* denom */
+	jb	LDo_3rd_div
+
+	movl	XsigH(%ebx),%edx
+	addl	%edx,FPU_accum_1
+
+	subl	$1,FPU_result_2		/* Adjust the answer */
+	sbbl	$0,FPU_result_3
+	jmp	LDo_3rd_div
+
+LTest_over:
+	movl	FPU_accum_1,%edx	/* get the reduced num */
+
+	/* need to check for possible subsequent overflow */
+	cmpl	XsigH(%ebx),%edx	/* denom */
+	jb	LDo_3rd_div
+
+	/* prevent overflow */
+	subl	XsigH(%ebx),%edx
+	movl	%edx,FPU_accum_1
+
+	addl	$1,FPU_result_2	/* Reflect the subtraction in the answer */
+	adcl	$0,FPU_result_3
+
+LDo_3rd_div:
+	movl	FPU_accum_0,%eax
+	movl	FPU_accum_1,%edx
+	divl	XsigH(%ebx)
+
+	movl    %eax,FPU_result_1       /* Rough estimate of third word */
+
+	movl	PARAM3,%esi		/* pointer to answer */
+
+	movl	FPU_result_1,%eax
+	movl	%eax,XsigLL(%esi)
+	movl	FPU_result_2,%eax
+	movl	%eax,XsigL(%esi)
+	movl	FPU_result_3,%eax
+	movl	%eax,XsigH(%esi)
+
+L_exit:
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+
+	leave
+	ret
+
+
+#ifdef PARANOID
+/* The logic is wrong if we got here */
+L_bugged:
+	pushl	EX_INTERNAL|0x240
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_exit
+
+L_bugged_1:
+	pushl	EX_INTERNAL|0x241
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_exit
+
+L_bugged_2:
+	pushl	EX_INTERNAL|0x242
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_exit
+#endif PARANOID
diff --git a/arch/i386/math-emu/div_small.S b/arch/i386/math-emu/div_small.S
new file mode 100644
index 000000000..0225a96d4
--- /dev/null
+++ b/arch/i386/math-emu/div_small.S
@@ -0,0 +1,50 @@
+	.file	"div_small.S"
+/*---------------------------------------------------------------------------+
+ |  div_small.S                                                              |
+ |                                                                           |
+ | Divide a 64 bit integer by a 32 bit integer & return remainder.           |
+ |                                                                           |
+ | Copyright (C) 1992    W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ |    unsigned long div_small(unsigned long long *x, unsigned long y)        |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_asm.h"
+
+.text
+	.align 2,144
+
+.globl _div_small
+
+_div_small:
+	pushl	%ebp
+	movl	%esp,%ebp
+
+	pushl	%esi
+
+	movl	PARAM1,%esi	/* pointer to num */
+	movl	PARAM2,%ecx	/* The denominator */
+
+	movl	4(%esi),%eax	/* Get the current num msw */
+	xorl	%edx,%edx
+	divl	%ecx
+
+	movl	%eax,4(%esi)
+
+	movl	(%esi),%eax	/* Get the num lsw */
+	divl	%ecx
+
+	movl	%eax,(%esi)
+
+	movl	%edx,%eax	/* Return the remainder in eax */
+
+	popl	%esi
+
+	leave
+	ret
+
diff --git a/arch/i386/math-emu/errors.c b/arch/i386/math-emu/errors.c
new file mode 100644
index 000000000..e34eec942
--- /dev/null
+++ b/arch/i386/math-emu/errors.c
@@ -0,0 +1,671 @@
+/*---------------------------------------------------------------------------+
+ |  errors.c                                                                 |
+ |                                                                           |
+ |  The error handling functions for wm-FPU-emu                              |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | Note:                                                                     |
+ |    The file contains code which accesses user memory.                     |
+ |    Emulator static data may change when user memory is accessed, due to   |
+ |    other processes using the emulator while swapping is in progress.      |
+ +---------------------------------------------------------------------------*/
+
+#include <linux/signal.h>
+
+#include <asm/segment.h>
+
+#include "fpu_system.h"
+#include "exception.h"
+#include "fpu_emu.h"
+#include "status_w.h"
+#include "control_w.h"
+#include "reg_constant.h"
+#include "version.h"
+
+/* */
+#undef PRINT_MESSAGES
+/* */
+
+
+void Un_impl(void)
+{
+  unsigned char byte1, FPU_modrm;
+  unsigned long address = FPU_ORIG_EIP;
+
+  RE_ENTRANT_CHECK_OFF;
+  /* No need to verify_area(), we have previously fetched these bytes. */
+  printk("Unimplemented FPU Opcode at eip=%p : ", (void *) address);
+  if ( FPU_CS == USER_CS )
+    {
+      while ( 1 )
+	{
+	  byte1 = get_fs_byte((unsigned char *) address);
+	  if ( (byte1 & 0xf8) == 0xd8 ) break;
+	  printk("[%02x]", byte1);
+	  address++;
+	}
+      printk("%02x ", byte1);
+      FPU_modrm = get_fs_byte(1 + (unsigned char *) address);
+      
+      if (FPU_modrm >= 0300)
+	printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
+      else
+	printk("/%d\n", (FPU_modrm >> 3) & 7);
+    }
+  else
+    {
+      printk("cs selector = %04x\n", FPU_CS);
+    }
+
+  RE_ENTRANT_CHECK_ON;
+
+  EXCEPTION(EX_Invalid);
+
+}
+
+
+/*
+   Called for opcodes which are illegal and which are known to result in a
+   SIGILL with a real 80486.
+   */
+void FPU_illegal(void)
+{
+  math_abort(FPU_info,SIGILL);
+}
+
+
+
+void emu_printall()
+{
+  int i;
+  static char *tag_desc[] = { "Valid", "Zero", "ERROR", "ERROR",
+                              "DeNorm", "Inf", "NaN", "Empty" };
+  unsigned char byte1, FPU_modrm;
+  unsigned long address = FPU_ORIG_EIP;
+
+  RE_ENTRANT_CHECK_OFF;
+  /* No need to verify_area(), we have previously fetched these bytes. */
+  printk("At %p:", (void *) address);
+  if ( FPU_CS == USER_CS )
+    {
+#define MAX_PRINTED_BYTES 20
+      for ( i = 0; i < MAX_PRINTED_BYTES; i++ )
+	{
+	  byte1 = get_fs_byte((unsigned char *) address);
+	  if ( (byte1 & 0xf8) == 0xd8 )
+	    {
+	      printk(" %02x", byte1);
+	      break;
+	    }
+	  printk(" [%02x]", byte1);
+	  address++;
+	}
+      if ( i == MAX_PRINTED_BYTES )
+	printk(" [more..]\n");
+      else
+	{
+	  FPU_modrm = get_fs_byte(1 + (unsigned char *) address);
+	  
+	  if (FPU_modrm >= 0300)
+	    printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
+	  else
+	    printk(" /%d, mod=%d rm=%d\n",
+		   (FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7);
+	}
+    }
+  else
+    {
+      printk("%04x\n", FPU_CS);
+    }
+
+  partial_status = status_word();
+
+#ifdef DEBUGGING
+if ( partial_status & SW_Backward )    printk("SW: backward compatibility\n");
+if ( partial_status & SW_C3 )          printk("SW: condition bit 3\n");
+if ( partial_status & SW_C2 )          printk("SW: condition bit 2\n");
+if ( partial_status & SW_C1 )          printk("SW: condition bit 1\n");
+if ( partial_status & SW_C0 )          printk("SW: condition bit 0\n");
+if ( partial_status & SW_Summary )     printk("SW: exception summary\n");
+if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n");
+if ( partial_status & SW_Precision )   printk("SW: loss of precision\n");
+if ( partial_status & SW_Underflow )   printk("SW: underflow\n");
+if ( partial_status & SW_Overflow )    printk("SW: overflow\n");
+if ( partial_status & SW_Zero_Div )    printk("SW: divide by zero\n");
+if ( partial_status & SW_Denorm_Op )   printk("SW: denormalized operand\n");
+if ( partial_status & SW_Invalid )     printk("SW: invalid operation\n");
+#endif DEBUGGING
+
+  printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n",
+	 partial_status & 0x8000 ? 1 : 0,   /* busy */
+	 (partial_status & 0x3800) >> 11,   /* stack top pointer */
+	 partial_status & 0x80 ? 1 : 0,     /* Error summary status */
+	 partial_status & 0x40 ? 1 : 0,     /* Stack flag */
+	 partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */
+	 partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */
+	 partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0,
+	 partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0,
+	 partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0);
+  
+printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d     ef=%d%d%d%d%d%d\n",
+	 control_word & 0x1000 ? 1 : 0,
+	 (control_word & 0x800) >> 11, (control_word & 0x400) >> 10,
+	 (control_word & 0x200) >> 9, (control_word & 0x100) >> 8,
+	 control_word & 0x80 ? 1 : 0,
+	 control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0,
+	 control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0,
+	 control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0);
+
+  for ( i = 0; i < 8; i++ )
+    {
+      FPU_REG *r = &st(i);
+      switch (r->tag)
+	{
+	case TW_Empty:
+	  continue;
+	  break;
+	case TW_Zero:
+#if 0
+	  printk("st(%d)  %c .0000 0000 0000 0000         ",
+		 i, r->sign ? '-' : '+');
+	  break;
+#endif
+	case TW_Valid:
+	case TW_NaN:
+/*	case TW_Denormal: */
+	case TW_Infinity:
+	  printk("st(%d)  %c .%04lx %04lx %04lx %04lx e%+-6ld ", i,
+		 r->sign ? '-' : '+',
+		 (long)(r->sigh >> 16),
+		 (long)(r->sigh & 0xFFFF),
+		 (long)(r->sigl >> 16),
+		 (long)(r->sigl & 0xFFFF),
+		 r->exp - EXP_BIAS + 1);
+	  break;
+	default:
+	  printk("Whoops! Error in errors.c      ");
+	  break;
+	}
+      printk("%s\n", tag_desc[(int) (unsigned) r->tag]);
+    }
+
+#ifdef OBSOLETE
+  printk("[data] %c .%04lx %04lx %04lx %04lx e%+-6ld ",
+	 FPU_loaded_data.sign ? '-' : '+',
+	 (long)(FPU_loaded_data.sigh >> 16),
+	 (long)(FPU_loaded_data.sigh & 0xFFFF),
+	 (long)(FPU_loaded_data.sigl >> 16),
+	 (long)(FPU_loaded_data.sigl & 0xFFFF),
+	 FPU_loaded_data.exp - EXP_BIAS + 1);
+  printk("%s\n", tag_desc[(int) (unsigned) FPU_loaded_data.tag]);
+#endif OBSOLETE
+  RE_ENTRANT_CHECK_ON;
+
+}
+
+static struct {
+  int type;
+  char *name;
+} exception_names[] = {
+  { EX_StackOver, "stack overflow" },
+  { EX_StackUnder, "stack underflow" },
+  { EX_Precision, "loss of precision" },
+  { EX_Underflow, "underflow" },
+  { EX_Overflow, "overflow" },
+  { EX_ZeroDiv, "divide by zero" },
+  { EX_Denormal, "denormalized operand" },
+  { EX_Invalid, "invalid operation" },
+  { EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION },
+  { 0, NULL }
+};
+
+/*
+ EX_INTERNAL is always given with a code which indicates where the
+ error was detected.
+
+ Internal error types:
+       0x14   in fpu_etc.c
+       0x1nn  in a *.c file:
+              0x101  in reg_add_sub.c
+              0x102  in reg_mul.c
+              0x104  in poly_atan.c
+              0x105  in reg_mul.c
+              0x107  in fpu_trig.c
+	      0x108  in reg_compare.c
+	      0x109  in reg_compare.c
+	      0x110  in reg_add_sub.c
+	      0x111  in fpe_entry.c
+	      0x112  in fpu_trig.c
+	      0x113  in errors.c
+	      0x115  in fpu_trig.c
+	      0x116  in fpu_trig.c
+	      0x117  in fpu_trig.c
+	      0x118  in fpu_trig.c
+	      0x119  in fpu_trig.c
+	      0x120  in poly_atan.c
+	      0x121  in reg_compare.c
+	      0x122  in reg_compare.c
+	      0x123  in reg_compare.c
+	      0x125  in fpu_trig.c
+	      0x126  in fpu_entry.c
+	      0x127  in poly_2xm1.c
+	      0x128  in fpu_entry.c
+	      0x129  in fpu_entry.c
+	      0x130  in get_address.c
+	      0x131  in get_address.c
+	      0x132  in get_address.c
+	      0x133  in get_address.c
+	      0x140  in load_store.c
+	      0x141  in load_store.c
+              0x150  in poly_sin.c
+              0x151  in poly_sin.c
+	      0x160  in reg_ld_str.c
+	      0x161  in reg_ld_str.c
+	      0x162  in reg_ld_str.c
+	      0x163  in reg_ld_str.c
+       0x2nn  in an *.S file:
+              0x201  in reg_u_add.S
+              0x202  in reg_u_div.S
+              0x203  in reg_u_div.S
+              0x204  in reg_u_div.S
+              0x205  in reg_u_mul.S
+              0x206  in reg_u_sub.S
+              0x207  in wm_sqrt.S
+	      0x208  in reg_div.S
+              0x209  in reg_u_sub.S
+              0x210  in reg_u_sub.S
+              0x211  in reg_u_sub.S
+              0x212  in reg_u_sub.S
+	      0x213  in wm_sqrt.S
+	      0x214  in wm_sqrt.S
+	      0x215  in wm_sqrt.S
+	      0x220  in reg_norm.S
+	      0x221  in reg_norm.S
+	      0x230  in reg_round.S
+	      0x231  in reg_round.S
+	      0x232  in reg_round.S
+	      0x233  in reg_round.S
+	      0x234  in reg_round.S
+	      0x235  in reg_round.S
+	      0x236  in reg_round.S
+	      0x240  in div_Xsig.S
+	      0x241  in div_Xsig.S
+	      0x242  in div_Xsig.S
+ */
+
+void exception(int n)
+{
+  int i, int_type;
+
+  int_type = 0;         /* Needed only to stop compiler warnings */
+  if ( n & EX_INTERNAL )
+    {
+      int_type = n - EX_INTERNAL;
+      n = EX_INTERNAL;
+      /* Set lots of exception bits! */
+      partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward);
+    }
+  else
+    {
+      /* Extract only the bits which we use to set the status word */
+      n &= (SW_Exc_Mask);
+      /* Set the corresponding exception bit */
+      partial_status |= n;
+      /* Set summary bits iff exception isn't masked */
+      if ( partial_status & ~control_word & CW_Exceptions )
+	partial_status |= (SW_Summary | SW_Backward);
+      if ( n & (SW_Stack_Fault | EX_Precision) )
+	{
+	  if ( !(n & SW_C1) )
+	    /* This bit distinguishes over- from underflow for a stack fault,
+	       and roundup from round-down for precision loss. */
+	    partial_status &= ~SW_C1;
+	}
+    }
+
+  RE_ENTRANT_CHECK_OFF;
+  if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) )
+    {
+#ifdef PRINT_MESSAGES
+      /* My message from the sponsor */
+      printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n");
+#endif PRINT_MESSAGES
+      
+      /* Get a name string for error reporting */
+      for (i=0; exception_names[i].type; i++)
+	if ( (exception_names[i].type & n) == exception_names[i].type )
+	  break;
+      
+      if (exception_names[i].type)
+	{
+#ifdef PRINT_MESSAGES
+	  printk("FP Exception: %s!\n", exception_names[i].name);
+#endif PRINT_MESSAGES
+	}
+      else
+	printk("FPU emulator: Unknown Exception: 0x%04x!\n", n);
+      
+      if ( n == EX_INTERNAL )
+	{
+	  printk("FPU emulator: Internal error type 0x%04x\n", int_type);
+	  emu_printall();
+	}
+#ifdef PRINT_MESSAGES
+      else
+	emu_printall();
+#endif PRINT_MESSAGES
+
+      /*
+       * The 80486 generates an interrupt on the next non-control FPU
+       * instruction. So we need some means of flagging it.
+       * We use the ES (Error Summary) bit for this, assuming that
+       * this is the way a real FPU does it (until I can check it out),
+       * if not, then some method such as the following kludge might
+       * be needed.
+       */
+/*      regs[0].tag |= TW_FPU_Interrupt; */
+    }
+  RE_ENTRANT_CHECK_ON;
+
+#ifdef __DEBUG__
+  math_abort(FPU_info,SIGFPE);
+#endif __DEBUG__
+
+}
+
+
+/* Real operation attempted on two operands, one a NaN. */
+/* Returns nz if the exception is unmasked */
+asmlinkage int real_2op_NaN(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest)
+{
+  FPU_REG const *x;
+  int signalling;
+
+  /* The default result for the case of two "equal" NaNs (signs may
+     differ) is chosen to reproduce 80486 behaviour */
+  x = a;
+  if (a->tag == TW_NaN)
+    {
+      if (b->tag == TW_NaN)
+	{
+	  signalling = !(a->sigh & b->sigh & 0x40000000);
+	  /* find the "larger" */
+	  if ( significand(a) < significand(b) )
+	    x = b;
+	}
+      else
+	{
+	  /* return the quiet version of the NaN in a */
+	  signalling = !(a->sigh & 0x40000000);
+	}
+    }
+  else
+#ifdef PARANOID
+    if (b->tag == TW_NaN)
+#endif PARANOID
+    {
+      signalling = !(b->sigh & 0x40000000);
+      x = b;
+    }
+#ifdef PARANOID
+  else
+    {
+      signalling = 0;
+      EXCEPTION(EX_INTERNAL|0x113);
+      x = &CONST_QNaN;
+    }
+#endif PARANOID
+
+  if ( !signalling )
+    {
+      if ( !(x->sigh & 0x80000000) )  /* pseudo-NaN ? */
+	x = &CONST_QNaN;
+      reg_move(x, dest);
+      return 0;
+    }
+
+  if ( control_word & CW_Invalid )
+    {
+      /* The masked response */
+      if ( !(x->sigh & 0x80000000) )  /* pseudo-NaN ? */
+	x = &CONST_QNaN;
+      reg_move(x, dest);
+      /* ensure a Quiet NaN */
+      dest->sigh |= 0x40000000;
+    }
+
+  EXCEPTION(EX_Invalid);
+  
+  return !(control_word & CW_Invalid);
+}
+
+
+/* Invalid arith operation on Valid registers */
+/* Returns nz if the exception is unmasked */
+asmlinkage int arith_invalid(FPU_REG *dest)
+{
+
+  EXCEPTION(EX_Invalid);
+  
+  if ( control_word & CW_Invalid )
+    {
+      /* The masked response */
+      reg_move(&CONST_QNaN, dest);
+    }
+  
+  return !(control_word & CW_Invalid);
+
+}
+
+
+/* Divide a finite number by zero */
+asmlinkage int divide_by_zero(int sign, FPU_REG *dest)
+{
+
+  if ( control_word & CW_ZeroDiv )
+    {
+      /* The masked response */
+      reg_move(&CONST_INF, dest);
+      dest->sign = (unsigned char)sign;
+    }
+ 
+  EXCEPTION(EX_ZeroDiv);
+
+  return !(control_word & CW_ZeroDiv);
+
+}
+
+
+/* This may be called often, so keep it lean */
+int set_precision_flag(int flags)
+{
+  if ( control_word & CW_Precision )
+    {
+      partial_status &= ~(SW_C1 & flags);
+      partial_status |= flags;   /* The masked response */
+      return 0;
+    }
+  else
+    {
+      exception(flags);
+      return 1;
+    }
+}
+
+
+/* This may be called often, so keep it lean */
+asmlinkage void set_precision_flag_up(void)
+{
+  if ( control_word & CW_Precision )
+    partial_status |= (SW_Precision | SW_C1);   /* The masked response */
+  else
+    exception(EX_Precision | SW_C1);
+
+}
+
+
+/* This may be called often, so keep it lean */
+asmlinkage void set_precision_flag_down(void)
+{
+  if ( control_word & CW_Precision )
+    {   /* The masked response */
+      partial_status &= ~SW_C1;
+      partial_status |= SW_Precision;
+    }
+  else
+    exception(EX_Precision);
+}
+
+
+asmlinkage int denormal_operand(void)
+{
+  if ( control_word & CW_Denormal )
+    {   /* The masked response */
+      partial_status |= SW_Denorm_Op;
+      return 0;
+    }
+  else
+    {
+      exception(EX_Denormal);
+      return 1;
+    }
+}
+
+
+asmlinkage int arith_overflow(FPU_REG *dest)
+{
+
+  if ( control_word & CW_Overflow )
+    {
+      char sign;
+      /* The masked response */
+/* ###### The response here depends upon the rounding mode */
+      sign = dest->sign;
+      reg_move(&CONST_INF, dest);
+      dest->sign = sign;
+    }
+  else
+    {
+      /* Subtract the magic number from the exponent */
+      dest->exp -= (3 * (1 << 13));
+    }
+
+  EXCEPTION(EX_Overflow);
+  if ( control_word & CW_Overflow )
+    {
+      /* The overflow exception is masked. */
+      /* By definition, precision is lost.
+	 The roundup bit (C1) is also set because we have
+	 "rounded" upwards to Infinity. */
+      EXCEPTION(EX_Precision | SW_C1);
+      return !(control_word & CW_Precision);
+    }
+
+  return !(control_word & CW_Overflow);
+
+}
+
+
+asmlinkage int arith_underflow(FPU_REG *dest)
+{
+
+  if ( control_word & CW_Underflow )
+    {
+      /* The masked response */
+      if ( dest->exp <= EXP_UNDER - 63 )
+	{
+	  reg_move(&CONST_Z, dest);
+	  partial_status &= ~SW_C1;       /* Round down. */
+	}
+    }
+  else
+    {
+      /* Add the magic number to the exponent. */
+      dest->exp += (3 * (1 << 13));
+    }
+
+  EXCEPTION(EX_Underflow);
+  if ( control_word & CW_Underflow )
+    {
+      /* The underflow exception is masked. */
+      EXCEPTION(EX_Precision);
+      return !(control_word & CW_Precision);
+    }
+
+  return !(control_word & CW_Underflow);
+
+}
+
+
+void stack_overflow(void)
+{
+
+ if ( control_word & CW_Invalid )
+    {
+      /* The masked response */
+      top--;
+      reg_move(&CONST_QNaN, &st(0));
+    }
+
+  EXCEPTION(EX_StackOver);
+
+  return;
+
+}
+
+
+void stack_underflow(void)
+{
+
+ if ( control_word & CW_Invalid )
+    {
+      /* The masked response */
+      reg_move(&CONST_QNaN, &st(0));
+    }
+
+  EXCEPTION(EX_StackUnder);
+
+  return;
+
+}
+
+
+void stack_underflow_i(int i)
+{
+
+ if ( control_word & CW_Invalid )
+    {
+      /* The masked response */
+      reg_move(&CONST_QNaN, &(st(i)));
+    }
+
+  EXCEPTION(EX_StackUnder);
+
+  return;
+
+}
+
+
+void stack_underflow_pop(int i)
+{
+
+ if ( control_word & CW_Invalid )
+    {
+      /* The masked response */
+      reg_move(&CONST_QNaN, &(st(i)));
+      pop();
+    }
+
+  EXCEPTION(EX_StackUnder);
+
+  return;
+
+}
+
diff --git a/arch/i386/math-emu/exception.h b/arch/i386/math-emu/exception.h
new file mode 100644
index 000000000..2e629a30c
--- /dev/null
+++ b/arch/i386/math-emu/exception.h
@@ -0,0 +1,53 @@
+/*---------------------------------------------------------------------------+
+ |  exception.h                                                              |
+ |                                                                           |
+ | Copyright (C) 1992    W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#ifndef _EXCEPTION_H_
+#define _EXCEPTION_H_
+
+
+#ifdef __ASSEMBLER__
+#define	Const_(x)	$##x
+#else
+#define	Const_(x)	x
+#endif
+
+#ifndef SW_C1
+#include "fpu_emu.h"
+#endif SW_C1
+
+#define FPU_BUSY        Const_(0x8000)   /* FPU busy bit (8087 compatibility) */
+#define EX_ErrorSummary Const_(0x0080)   /* Error summary status */
+/* Special exceptions: */
+#define	EX_INTERNAL	Const_(0x8000)	/* Internal error in wm-FPU-emu */
+#define EX_StackOver	Const_(0x0041|SW_C1)	/* stack overflow */
+#define EX_StackUnder	Const_(0x0041)	/* stack underflow */
+/* Exception flags: */
+#define EX_Precision	Const_(0x0020)	/* loss of precision */
+#define EX_Underflow	Const_(0x0010)	/* underflow */
+#define EX_Overflow	Const_(0x0008)	/* overflow */
+#define EX_ZeroDiv	Const_(0x0004)	/* divide by zero */
+#define EX_Denormal	Const_(0x0002)	/* denormalized operand */
+#define EX_Invalid	Const_(0x0001)	/* invalid operation */
+
+
+#define PRECISION_LOST_UP    Const_((EX_Precision | SW_C1))
+#define PRECISION_LOST_DOWN  Const_(EX_Precision)
+
+
+#ifndef __ASSEMBLER__
+
+#ifdef DEBUG
+#define	EXCEPTION(x)	{ printk("exception in %s at line %d\n", \
+	__FILE__, __LINE__); exception(x); }
+#else
+#define	EXCEPTION(x)	exception(x)
+#endif
+
+#endif __ASSEMBLER__
+
+#endif _EXCEPTION_H_
diff --git a/arch/i386/math-emu/fpu_arith.c b/arch/i386/math-emu/fpu_arith.c
new file mode 100644
index 000000000..96e6bd89b
--- /dev/null
+++ b/arch/i386/math-emu/fpu_arith.c
@@ -0,0 +1,179 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_arith.c                                                              |
+ |                                                                           |
+ | Code to implement the FPU register/register arithmetic instructions       |
+ |                                                                           |
+ | Copyright (C) 1992,1993                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_system.h"
+#include "fpu_emu.h"
+#include "control_w.h"
+#include "status_w.h"
+
+
+void fadd__()
+{
+  /* fadd st,st(i) */
+  clear_C1();
+  reg_add(&st(0), &st(FPU_rm), &st(0), control_word);
+}
+
+
+void fmul__()
+{
+  /* fmul st,st(i) */
+  clear_C1();
+  reg_mul(&st(0), &st(FPU_rm), &st(0), control_word);
+}
+
+
+
+void fsub__()
+{
+  /* fsub st,st(i) */
+  clear_C1();
+  reg_sub(&st(0), &st(FPU_rm), &st(0), control_word);
+}
+
+
+void fsubr_()
+{
+  /* fsubr st,st(i) */
+  clear_C1();
+  reg_sub(&st(FPU_rm), &st(0), &st(0), control_word);
+}
+
+
+void fdiv__()
+{
+  /* fdiv st,st(i) */
+  clear_C1();
+  reg_div(&st(0), &st(FPU_rm), &st(0), control_word);
+}
+
+
+void fdivr_()
+{
+  /* fdivr st,st(i) */
+  clear_C1();
+  reg_div(&st(FPU_rm), &st(0), &st(0), control_word);
+}
+
+
+
+void fadd_i()
+{
+  /* fadd st(i),st */
+  clear_C1();
+  reg_add(&st(0), &st(FPU_rm), &st(FPU_rm), control_word);
+}
+
+
+void fmul_i()
+{
+  /* fmul st(i),st */
+  clear_C1();
+  reg_mul(&st(0), &st(FPU_rm), &st(FPU_rm), control_word);
+}
+
+
+void fsubri()
+{
+  /* fsubr st(i),st */
+  /* This is the sense of the 80486 manual
+     reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word); */
+  clear_C1();
+  reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word);
+}
+
+
+void fsub_i()
+{
+  /* fsub st(i),st */
+  /* This is the sense of the 80486 manual
+     reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); */
+  clear_C1();
+  reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word);
+}
+
+
+void fdivri()
+{
+  /* fdivr st(i),st */
+  clear_C1();
+  reg_div(&st(0), &st(FPU_rm), &st(FPU_rm), control_word);
+}
+
+
+void fdiv_i()
+{
+  /* fdiv st(i),st */
+  clear_C1();
+  reg_div(&st(FPU_rm), &st(0), &st(FPU_rm), control_word);
+}
+
+
+
+void faddp_()
+{
+  /* faddp st(i),st */
+  clear_C1();
+  if ( !reg_add(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) )
+    pop();
+}
+
+
+void fmulp_()
+{
+  /* fmulp st(i),st */
+  clear_C1();
+  if ( !reg_mul(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) )
+    pop();
+}
+
+
+
+void fsubrp()
+{
+  /* fsubrp st(i),st */
+  /* This is the sense of the 80486 manual
+     reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word); */
+  clear_C1();
+  if ( !reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) )
+    pop();
+}
+
+
+void fsubp_()
+{
+  /* fsubp st(i),st */
+  /* This is the sense of the 80486 manual
+     reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); */
+  clear_C1();
+  if ( !reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word) )
+    pop();
+}
+
+
+void fdivrp()
+{
+  /* fdivrp st(i),st */
+  clear_C1();
+  if ( !reg_div(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) )
+    pop();
+}
+
+
+void fdivp_()
+{
+  /* fdivp st(i),st */
+  clear_C1();
+  if ( !reg_div(&st(FPU_rm), &st(0), &st(FPU_rm), control_word) )
+    pop();
+}
+
diff --git a/arch/i386/math-emu/fpu_asm.h b/arch/i386/math-emu/fpu_asm.h
new file mode 100644
index 000000000..8eb60148d
--- /dev/null
+++ b/arch/i386/math-emu/fpu_asm.h
@@ -0,0 +1,30 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_asm.h                                                                |
+ |                                                                           |
+ | Copyright (C) 1992    W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#ifndef _FPU_ASM_H_
+#define _FPU_ASM_H_
+
+#include "fpu_emu.h"
+
+#define	EXCEPTION	_exception
+
+
+#define PARAM1	8(%ebp)
+#define	PARAM2	12(%ebp)
+#define	PARAM3	16(%ebp)
+#define	PARAM4	20(%ebp)
+
+#define SIGL_OFFSET 8
+#define SIGN(x)	(x)
+#define	TAG(x)	1(x)
+#define	EXP(x)	4(x)
+#define SIG(x)	SIGL_OFFSET##(x)
+#define	SIGL(x)	SIGL_OFFSET##(x)
+#define	SIGH(x)	12(x)
+
+#endif _FPU_ASM_H_
diff --git a/arch/i386/math-emu/fpu_aux.c b/arch/i386/math-emu/fpu_aux.c
new file mode 100644
index 000000000..0d35fe19b
--- /dev/null
+++ b/arch/i386/math-emu/fpu_aux.c
@@ -0,0 +1,184 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_aux.c                                                                |
+ |                                                                           |
+ | Code to implement some of the FPU auxiliary instructions.                 |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_system.h"
+#include "exception.h"
+#include "fpu_emu.h"
+#include "status_w.h"
+#include "control_w.h"
+
+
+static void fnop(void)
+{
+}
+
+void fclex(void)
+{
+  partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision|
+		   SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op|
+		   SW_Invalid);
+  no_ip_update = 1;
+}
+
+/* Needs to be externally visible */
+void finit()
+{
+  int r;
+  control_word = 0x037f;
+  partial_status = 0;
+  top = 0;            /* We don't keep top in the status word internally. */
+  for (r = 0; r < 8; r++)
+    {
+      regs[r].tag = TW_Empty;
+    }
+  /* The behaviour is different to that detailed in
+     Section 15.1.6 of the Intel manual */
+  operand_address.offset = 0;
+  operand_address.selector = 0;
+  instruction_address.offset = 0;
+  instruction_address.selector = 0;
+  instruction_address.opcode = 0;
+  no_ip_update = 1;
+}
+
+/*
+ * These are nops on the i387..
+ */
+#define feni fnop
+#define fdisi fnop
+#define fsetpm fnop
+
+static FUNC const finit_table[] = {
+  feni, fdisi, fclex, finit,
+  fsetpm, FPU_illegal, FPU_illegal, FPU_illegal
+};
+
+void finit_()
+{
+  (finit_table[FPU_rm])();
+}
+
+
+static void fstsw_ax(void)
+{
+  *(short *) &FPU_EAX = status_word();
+  no_ip_update = 1;
+}
+
+static FUNC const fstsw_table[] = {
+  fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal,
+  FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
+};
+
+void fstsw_()
+{
+  (fstsw_table[FPU_rm])();
+}
+
+
+static FUNC const fp_nop_table[] = {
+  fnop, FPU_illegal, FPU_illegal, FPU_illegal,
+  FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
+};
+
+void fp_nop()
+{
+  (fp_nop_table[FPU_rm])();
+}
+
+
+void fld_i_()
+{
+  FPU_REG *st_new_ptr;
+
+  if ( STACK_OVERFLOW )
+    { stack_overflow(); return; }
+
+  /* fld st(i) */
+  if ( NOT_EMPTY(FPU_rm) )
+    { reg_move(&st(FPU_rm), st_new_ptr); push(); }
+  else
+    {
+      if ( control_word & CW_Invalid )
+	{
+	  /* The masked response */
+	  stack_underflow();
+	}
+      else
+	EXCEPTION(EX_StackUnder);
+    }
+
+}
+
+
+void fxch_i()
+{
+  /* fxch st(i) */
+  FPU_REG t;
+  register FPU_REG *sti_ptr = &st(FPU_rm), *st0_ptr = &st(0);
+
+  if ( st0_ptr->tag == TW_Empty )
+    {
+      if ( sti_ptr->tag == TW_Empty )
+	{
+	  stack_underflow();
+	  stack_underflow_i(FPU_rm);
+	  return;
+	}
+      if ( control_word & CW_Invalid )
+	reg_move(sti_ptr, st0_ptr);   /* Masked response */
+      stack_underflow_i(FPU_rm);
+      return;
+    }
+  if ( sti_ptr->tag == TW_Empty )
+    {
+      if ( control_word & CW_Invalid )
+	reg_move(st0_ptr, sti_ptr);   /* Masked response */
+      stack_underflow();
+      return;
+    }
+  clear_C1();
+  reg_move(st0_ptr, &t);
+  reg_move(sti_ptr, st0_ptr);
+  reg_move(&t, sti_ptr);
+}
+
+
+void ffree_()
+{
+  /* ffree st(i) */
+  st(FPU_rm).tag = TW_Empty;
+}
+
+
+void ffreep()
+{
+  /* ffree st(i) + pop - unofficial code */
+  st(FPU_rm).tag = TW_Empty;
+  pop();
+}
+
+
+void fst_i_()
+{
+  /* fst st(i) */
+  reg_move(&st(0), &st(FPU_rm));
+}
+
+
+void fstp_i()
+{
+  /* fstp st(i) */
+  reg_move(&st(0), &st(FPU_rm));
+  pop();
+}
+
diff --git a/arch/i386/math-emu/fpu_emu.h b/arch/i386/math-emu/fpu_emu.h
new file mode 100644
index 000000000..9d2c5dd13
--- /dev/null
+++ b/arch/i386/math-emu/fpu_emu.h
@@ -0,0 +1,171 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_emu.h                                                                |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+
+#ifndef _FPU_EMU_H_
+#define _FPU_EMU_H_
+
+/*
+ * Define DENORM_OPERAND to make the emulator detect denormals
+ * and use the denormal flag of the status word. Note: this only
+ * affects the flag and corresponding interrupt, the emulator
+ * will always generate denormals and operate upon them as required.
+ */
+#define DENORM_OPERAND
+
+/*
+ * Define PECULIAR_486 to get a closer approximation to 80486 behaviour,
+ * rather than behaviour which appears to be cleaner.
+ * This is a matter of opinion: for all I know, the 80486 may simply
+ * be complying with the IEEE spec. Maybe one day I'll get to see the
+ * spec...
+ */
+#define PECULIAR_486
+
+#ifdef __ASSEMBLER__
+#include "fpu_asm.h"
+#define	Const(x)	$##x
+#else
+#define	Const(x)	x
+#endif
+
+#define EXP_BIAS	Const(0)
+#define EXP_OVER	Const(0x4000)    /* smallest invalid large exponent */
+#define	EXP_UNDER	Const(-0x3fff)   /* largest invalid small exponent */
+#define EXP_Infinity    EXP_OVER
+#define EXP_NaN         EXP_OVER
+
+#define SIGN_POS	Const(0)
+#define SIGN_NEG	Const(1)
+
+/* Keep the order TW_Valid, TW_Zero, TW_Denormal */
+#define TW_Valid	Const(0)	/* valid */
+#define TW_Zero		Const(1)	/* zero */
+/* The following fold to 2 (Special) in the Tag Word */
+/* #define TW_Denormal     Const(4) */       /* De-normal */
+#define TW_Infinity	Const(5)	/* + or - infinity */
+#define	TW_NaN		Const(6)	/* Not a Number */
+
+#define TW_Empty	Const(7)	/* empty */
+
+
+#ifndef __ASSEMBLER__
+
+#include <linux/math_emu.h>
+#include <linux/linkage.h>
+
+/*
+#define RE_ENTRANT_CHECKING
+ */
+
+#ifdef RE_ENTRANT_CHECKING
+extern char emulating;
+#  define RE_ENTRANT_CHECK_OFF emulating = 0
+#  define RE_ENTRANT_CHECK_ON emulating = 1
+#else
+#  define RE_ENTRANT_CHECK_OFF
+#  define RE_ENTRANT_CHECK_ON
+#endif RE_ENTRANT_CHECKING
+
+#define FWAIT_OPCODE 0x9b
+#define OP_SIZE_PREFIX 0x66
+#define ADDR_SIZE_PREFIX 0x67
+#define PREFIX_CS 0x2e
+#define PREFIX_DS 0x3e
+#define PREFIX_ES 0x26
+#define PREFIX_SS 0x36
+#define PREFIX_FS 0x64
+#define PREFIX_GS 0x65
+#define PREFIX_REPE 0xf3
+#define PREFIX_REPNE 0xf2
+#define PREFIX_LOCK 0xf0
+#define PREFIX_CS_ 1
+#define PREFIX_DS_ 2
+#define PREFIX_ES_ 3
+#define PREFIX_FS_ 4
+#define PREFIX_GS_ 5
+#define PREFIX_SS_ 6
+#define PREFIX_DEFAULT 7
+
+struct address {
+  unsigned int offset;
+  unsigned int selector:16;
+  unsigned int opcode:11;
+  unsigned int empty:5;
+};
+typedef void (*FUNC)(void);
+typedef struct fpu_reg FPU_REG;
+typedef void (*FUNC_ST0)(FPU_REG *st0_ptr);
+typedef struct { unsigned char address_size, operand_size, segment; }
+        overrides;
+/* This structure is 32 bits: */
+typedef struct { overrides override;
+		 unsigned char default_mode; } fpu_addr_modes;
+/* PROTECTED has a restricted meaning in the emulator; it is used
+   to signal that the emulator needs to do special things to ensure
+   that protection is respected in a segmented model. */
+#define PROTECTED 4
+#define SIXTEEN   1         /* We rely upon this being 1 (true) */
+#define VM86      SIXTEEN
+#define PM16      (SIXTEEN | PROTECTED)
+#define SEG32     PROTECTED
+extern unsigned char const data_sizes_16[32];
+
+#define	st(x)	( regs[((top+x) &7 )] )
+
+#define	STACK_OVERFLOW	(st_new_ptr = &st(-1), st_new_ptr->tag != TW_Empty)
+#define	NOT_EMPTY(i)	(st(i).tag != TW_Empty)
+#define	NOT_EMPTY_ST0	(st0_tag ^ TW_Empty)
+
+#define pop()	{ regs[(top++ & 7 )].tag = TW_Empty; }
+#define poppop() { regs[((top + 1) & 7 )].tag \
+		     = regs[(top & 7 )].tag = TW_Empty; \
+		   top += 2; }
+
+/* push() does not affect the tags */
+#define push()	{ top--; }
+
+
+#define reg_move(x, y) { \
+		 *(short *)&((y)->sign) = *(short *)&((x)->sign); \
+		 *(long *)&((y)->exp) = *(long *)&((x)->exp); \
+		 *(long long *)&((y)->sigl) = *(long long *)&((x)->sigl); }
+
+#define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] )
+
+
+/*----- Prototypes for functions written in assembler -----*/
+/* extern void reg_move(FPU_REG *a, FPU_REG *b); */
+
+asmlinkage void normalize(FPU_REG *x);
+asmlinkage void normalize_nuo(FPU_REG *x);
+asmlinkage int reg_div(FPU_REG const *arg1, FPU_REG const *arg2,
+		       FPU_REG *answ, unsigned int control_w);
+asmlinkage int reg_u_sub(FPU_REG const *arg1, FPU_REG const *arg2,
+			 FPU_REG *answ, unsigned int control_w);
+asmlinkage int reg_u_mul(FPU_REG const *arg1, FPU_REG const *arg2,
+			 FPU_REG *answ, unsigned int control_w);
+asmlinkage int reg_u_div(FPU_REG const *arg1, FPU_REG const *arg2,
+			 FPU_REG *answ, unsigned int control_w);
+asmlinkage int reg_u_add(FPU_REG const *arg1, FPU_REG const *arg2,
+			 FPU_REG *answ, unsigned int control_w);
+asmlinkage int wm_sqrt(FPU_REG *n, unsigned int control_w);
+asmlinkage unsigned	shrx(void *l, unsigned x);
+asmlinkage unsigned	shrxs(void *v, unsigned x);
+asmlinkage unsigned long div_small(unsigned long long *x, unsigned long y);
+asmlinkage void round_reg(FPU_REG *arg, unsigned int extent,
+		      unsigned int control_w);
+
+#ifndef MAKING_PROTO
+#include "fpu_proto.h"
+#endif
+
+#endif __ASSEMBLER__
+
+#endif _FPU_EMU_H_
diff --git a/arch/i386/math-emu/fpu_entry.c b/arch/i386/math-emu/fpu_entry.c
new file mode 100644
index 000000000..b2777a722
--- /dev/null
+++ b/arch/i386/math-emu/fpu_entry.c
@@ -0,0 +1,690 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_entry.c                                                              |
+ |                                                                           |
+ | The entry function for wm-FPU-emu                                         |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ | See the files "README" and "COPYING" for further copyright and warranty   |
+ | information.                                                              |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | Note:                                                                     |
+ |    The file contains code which accesses user memory.                     |
+ |    Emulator static data may change when user memory is accessed, due to   |
+ |    other processes using the emulator while swapping is in progress.      |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | math_emulate() is the sole entry point for wm-FPU-emu                     |
+ +---------------------------------------------------------------------------*/
+
+#include <linux/signal.h>
+
+#include <asm/segment.h>
+
+#include "fpu_system.h"
+#include "fpu_emu.h"
+#include "exception.h"
+#include "control_w.h"
+#include "status_w.h"
+
+#define __BAD__ FPU_illegal   /* Illegal on an 80486, causes SIGILL */
+
+#ifndef NO_UNDOC_CODE    /* Un-documented FPU op-codes supported by default. */
+
+/* WARNING: These codes are not documented by Intel in their 80486 manual
+   and may not work on FPU clones or later Intel FPUs. */
+
+/* Changes to support the un-doc codes provided by Linus Torvalds. */
+
+#define _d9_d8_ fstp_i    /* unofficial code (19) */
+#define _dc_d0_ fcom_st   /* unofficial code (14) */
+#define _dc_d8_ fcompst   /* unofficial code (1c) */
+#define _dd_c8_ fxch_i    /* unofficial code (0d) */
+#define _de_d0_ fcompst   /* unofficial code (16) */
+#define _df_c0_ ffreep    /* unofficial code (07) ffree + pop */
+#define _df_c8_ fxch_i    /* unofficial code (0f) */
+#define _df_d0_ fstp_i    /* unofficial code (17) */
+#define _df_d8_ fstp_i    /* unofficial code (1f) */
+
+static FUNC const st_instr_table[64] = {
+  fadd__,   fld_i_,  __BAD__, __BAD__, fadd_i,  ffree_,  faddp_,  _df_c0_,
+  fmul__,   fxch_i,  __BAD__, __BAD__, fmul_i,  _dd_c8_, fmulp_,  _df_c8_,
+  fcom_st,  fp_nop,  __BAD__, __BAD__, _dc_d0_, fst_i_,  _de_d0_, _df_d0_,
+  fcompst,  _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i,  fcompp,  _df_d8_,
+  fsub__,   fp_etc,  __BAD__, finit_,  fsubri,  fucom_,  fsubrp,  fstsw_,
+  fsubr_,   fconst,  fucompp, __BAD__, fsub_i,  fucomp,  fsubp_,  __BAD__,
+  fdiv__,   trig_a,  __BAD__, __BAD__, fdivri,  __BAD__, fdivrp,  __BAD__,
+  fdivr_,   trig_b,  __BAD__, __BAD__, fdiv_i,  __BAD__, fdivp_,  __BAD__,
+};
+
+#else     /* Support only documented FPU op-codes */
+
+static FUNC const st_instr_table[64] = {
+  fadd__,   fld_i_,  __BAD__, __BAD__, fadd_i,  ffree_,  faddp_,  __BAD__,
+  fmul__,   fxch_i,  __BAD__, __BAD__, fmul_i,  __BAD__, fmulp_,  __BAD__,
+  fcom_st,  fp_nop,  __BAD__, __BAD__, __BAD__, fst_i_,  __BAD__, __BAD__,
+  fcompst,  __BAD__, __BAD__, __BAD__, __BAD__, fstp_i,  fcompp,  __BAD__,
+  fsub__,   fp_etc,  __BAD__, finit_,  fsubri,  fucom_,  fsubrp,  fstsw_,
+  fsubr_,   fconst,  fucompp, __BAD__, fsub_i,  fucomp,  fsubp_,  __BAD__,
+  fdiv__,   trig_a,  __BAD__, __BAD__, fdivri,  __BAD__, fdivrp,  __BAD__,
+  fdivr_,   trig_b,  __BAD__, __BAD__, fdiv_i,  __BAD__, fdivp_,  __BAD__,
+};
+
+#endif NO_UNDOC_CODE
+
+
+#define _NONE_ 0   /* Take no special action */
+#define _REG0_ 1   /* Need to check for not empty st(0) */
+#define _REGI_ 2   /* Need to check for not empty st(0) and st(rm) */
+#define _REGi_ 0   /* Uses st(rm) */
+#define _PUSH_ 3   /* Need to check for space to push onto stack */
+#define _null_ 4   /* Function illegal or not implemented */
+#define _REGIi 5   /* Uses st(0) and st(rm), result to st(rm) */
+#define _REGIp 6   /* Uses st(0) and st(rm), result to st(rm) then pop */
+#define _REGIc 0   /* Compare st(0) and st(rm) */
+#define _REGIn 0   /* Uses st(0) and st(rm), but handle checks later */
+
+#ifndef NO_UNDOC_CODE
+
+/* Un-documented FPU op-codes supported by default. (see above) */
+
+static unsigned char const type_table[64] = {
+  _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
+  _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
+  _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
+  _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
+  _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
+  _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
+  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
+};
+
+#else     /* Support only documented FPU op-codes */
+
+static unsigned char const type_table[64] = {
+  _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
+  _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+  _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
+  _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
+  _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
+  _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
+  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
+};
+
+#endif NO_UNDOC_CODE
+
+
+#ifdef RE_ENTRANT_CHECKING
+char emulating=0;
+#endif RE_ENTRANT_CHECKING
+
+static int valid_prefix(unsigned char *Byte, unsigned char **fpu_eip,
+			overrides *override);
+
+asmlinkage void math_emulate(long arg)
+{
+  unsigned char  FPU_modrm, byte1;
+  unsigned short code;
+  fpu_addr_modes addr_modes;
+  int unmasked;
+  FPU_REG loaded_data;
+  void *data_address;
+  struct address data_sel_off;
+  struct address entry_sel_off;
+  unsigned long code_base = 0;
+  unsigned long code_limit = 0;  /* Initialized to stop compiler warnings */
+  char	       st0_tag;
+  FPU_REG      *st0_ptr;
+  struct desc_struct code_descriptor;
+
+#ifdef RE_ENTRANT_CHECKING
+  if ( emulating )
+    {
+      printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n");
+    }
+  RE_ENTRANT_CHECK_ON;
+#endif RE_ENTRANT_CHECKING
+
+  if (!current->used_math)
+    {
+      int i;
+      for ( i = 0; i < 8; i++ )
+	{
+	  /* Make sure that the registers are compatible
+	     with the assumptions of the emulator. */
+	  regs[i].exp = 0;
+	  regs[i].sigh = 0x80000000;
+	}
+      finit();
+      current->used_math = 1;
+    }
+
+  SETUP_DATA_AREA(arg);
+
+  FPU_ORIG_EIP = FPU_EIP;
+
+  if ( (FPU_EFLAGS & 0x00020000) != 0 )
+    {
+      /* Virtual 8086 mode */
+      addr_modes.default_mode = VM86;
+      FPU_EIP += code_base = FPU_CS << 4;
+      code_limit = code_base + 0xffff;  /* Assumes code_base <= 0xffff0000 */
+    }
+  else if ( FPU_CS == USER_CS && FPU_DS == USER_DS )
+    {
+      addr_modes.default_mode = 0;
+    }
+  else if ( FPU_CS == KERNEL_CS )
+    {
+      printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP);
+      panic("Math emulation needed in kernel");
+    }
+  else
+    {
+
+      if ( (FPU_CS & 4) != 4 )   /* Must be in the LDT */
+	{
+	  /* Can only handle segmented addressing via the LDT
+	     for now, and it must be 16 bit */
+	  printk("FPU emulator: Unsupported addressing mode\n");
+	  math_abort(FPU_info, SIGILL);
+	}
+
+      if ( SEG_D_SIZE(code_descriptor = LDT_DESCRIPTOR(FPU_CS)) )
+	{
+	  /* The above test may be wrong, the book is not clear */
+	  /* Segmented 32 bit protected mode */
+	  addr_modes.default_mode = SEG32;
+	}
+      else
+	{
+	  /* 16 bit protected mode */
+	  addr_modes.default_mode = PM16;
+	}
+      FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor);
+      code_limit = code_base
+	+ (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor)
+	  - 1;
+      if ( code_limit < code_base ) code_limit = 0xffffffff;
+    }
+
+  FPU_lookahead = 1;
+  if (current->flags & PF_PTRACED)
+    FPU_lookahead = 0;
+
+  if ( !valid_prefix(&byte1, (unsigned char **)&FPU_EIP,
+		     &addr_modes.override) )
+    {
+      RE_ENTRANT_CHECK_OFF;
+      printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n"
+	     "FPU emulator: self-modifying code! (emulation impossible)\n",
+	     byte1);
+      RE_ENTRANT_CHECK_ON;
+      EXCEPTION(EX_INTERNAL|0x126);
+      math_abort(FPU_info,SIGILL);
+    }
+
+do_another_FPU_instruction:
+
+  no_ip_update = 0;
+
+  FPU_EIP++;  /* We have fetched the prefix and first code bytes. */
+
+  if ( addr_modes.default_mode )
+    {
+      /* This checks for the minimum instruction bytes.
+	 We also need to check any extra (address mode) code access. */
+      if ( FPU_EIP > code_limit )
+	math_abort(FPU_info,SIGSEGV);
+    }
+
+  if ( (byte1 & 0xf8) != 0xd8 )
+    {
+      if ( byte1 == FWAIT_OPCODE )
+	{
+	  if (partial_status & SW_Summary)
+	    goto do_the_FPU_interrupt;
+	  else
+	    goto FPU_fwait_done;
+	}
+#ifdef PARANOID
+      EXCEPTION(EX_INTERNAL|0x128);
+      math_abort(FPU_info,SIGILL);
+#endif PARANOID
+    }
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_code_verify_area(1);
+  FPU_modrm = get_fs_byte((unsigned char *) FPU_EIP);
+  RE_ENTRANT_CHECK_ON;
+  FPU_EIP++;
+
+  if (partial_status & SW_Summary)
+    {
+      /* Ignore the error for now if the current instruction is a no-wait
+	 control instruction */
+      /* The 80486 manual contradicts itself on this topic,
+	 but a real 80486 uses the following instructions:
+	 fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex.
+       */
+      code = (FPU_modrm << 8) | byte1;
+      if ( ! ( (((code & 0xf803) == 0xe003) ||    /* fnclex, fninit, fnstsw */
+		(((code & 0x3003) == 0x3001) &&   /* fnsave, fnstcw, fnstenv,
+						     fnstsw */
+		 ((code & 0xc000) != 0xc000))) ) )
+	{
+	  /*
+	   *  We need to simulate the action of the kernel to FPU
+	   *  interrupts here.
+	   *  Currently, the "real FPU" part of the kernel (0.99.10)
+	   *  clears the exception flags, sets the registers to empty,
+	   *  and passes information back to the interrupted process
+	   *  via the cs selector and operand selector, so we do the same.
+	   */
+	do_the_FPU_interrupt:
+	  instruction_address.selector = status_word();
+      	  operand_address.selector = tag_word();
+	  partial_status = 0;
+	  top = 0;
+	  {
+	    int r;
+	    for (r = 0; r < 8; r++)
+	      {
+		regs[r].tag = TW_Empty;
+	      }
+	  }
+
+	  FPU_EIP = FPU_ORIG_EIP;	/* Point to current FPU instruction. */
+
+	  RE_ENTRANT_CHECK_OFF;
+	  current->tss.trap_no = 16;
+	  current->tss.error_code = 0;
+	  send_sig(SIGFPE, current, 1);
+	  return;
+	}
+    }
+
+  entry_sel_off.offset = FPU_ORIG_EIP;
+  entry_sel_off.selector = FPU_CS;
+  entry_sel_off.opcode = (byte1 << 8) | FPU_modrm;
+
+  FPU_rm = FPU_modrm & 7;
+
+  if ( FPU_modrm < 0300 )
+    {
+      /* All of these instructions use the mod/rm byte to get a data address */
+
+      if ( (addr_modes.default_mode & SIXTEEN)
+	  ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) )
+	data_address = get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off,
+				      addr_modes);
+      else
+	data_address = get_address(FPU_modrm, &FPU_EIP, &data_sel_off,
+				   addr_modes);
+
+      if ( addr_modes.default_mode )
+	{
+	  if ( FPU_EIP-1 > code_limit )
+	    math_abort(FPU_info,SIGSEGV);
+	}
+
+      if ( !(byte1 & 1) )
+	{
+	  unsigned short status1 = partial_status;
+
+	  st0_ptr = &st(0);
+	  st0_tag = st0_ptr->tag;
+
+	  /* Stack underflow has priority */
+	  if ( NOT_EMPTY_ST0 )
+	    {
+	      if ( addr_modes.default_mode & PROTECTED )
+		{
+		  /* This table works for 16 and 32 bit protected mode */
+		  if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] )
+		    math_abort(FPU_info,SIGSEGV);
+		}
+
+	      unmasked = 0;  /* Do this here to stop compiler warnings. */
+	      switch ( (byte1 >> 1) & 3 )
+		{
+		case 0:
+		  unmasked = reg_load_single((float *)data_address,
+					     &loaded_data);
+		  break;
+		case 1:
+		  reg_load_int32((long *)data_address, &loaded_data);
+		  break;
+		case 2:
+		  unmasked = reg_load_double((double *)data_address,
+					     &loaded_data);
+		  break;
+		case 3:
+		  reg_load_int16((short *)data_address, &loaded_data);
+		  break;
+		}
+	      
+	      /* No more access to user memory, it is safe
+		 to use static data now */
+
+	      /* NaN operands have the next priority. */
+	      /* We have to delay looking at st(0) until after
+		 loading the data, because that data might contain an SNaN */
+	      if ( (st0_tag == TW_NaN) ||
+		  (loaded_data.tag == TW_NaN) )
+		{
+		  /* Restore the status word; we might have loaded a
+		     denormal. */
+		  partial_status = status1;
+		  if ( (FPU_modrm & 0x30) == 0x10 )
+		    {
+		      /* fcom or fcomp */
+		      EXCEPTION(EX_Invalid);
+		      setcc(SW_C3 | SW_C2 | SW_C0);
+		      if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
+			pop();             /* fcomp, masked, so we pop. */
+		    }
+		  else
+		    {
+#ifdef PECULIAR_486
+		      /* This is not really needed, but gives behaviour
+			 identical to an 80486 */
+		      if ( (FPU_modrm & 0x28) == 0x20 )
+			/* fdiv or fsub */
+			real_2op_NaN(&loaded_data, st0_ptr,
+				     st0_ptr);
+		      else
+#endif PECULIAR_486
+			/* fadd, fdivr, fmul, or fsubr */
+			real_2op_NaN(st0_ptr, &loaded_data,
+				     st0_ptr);
+		    }
+		  goto reg_mem_instr_done;
+		}
+
+	      if ( unmasked && !((FPU_modrm & 0x30) == 0x10) )
+		{
+		  /* Is not a comparison instruction. */
+		  if ( (FPU_modrm & 0x38) == 0x38 )
+		    {
+		      /* fdivr */
+		      if ( (st0_tag == TW_Zero) &&
+			  (loaded_data.tag == TW_Valid) )
+			{
+			  if ( divide_by_zero(loaded_data.sign,
+					      st0_ptr) )
+			    {
+			      /* We use the fact here that the unmasked
+				 exception in the loaded data was for a
+				 denormal operand */
+			      /* Restore the state of the denormal op bit */
+			      partial_status &= ~SW_Denorm_Op;
+			      partial_status |= status1 & SW_Denorm_Op;
+			    }
+			}
+		    }
+		  goto reg_mem_instr_done;
+		}
+
+	      switch ( (FPU_modrm >> 3) & 7 )
+		{
+		case 0:         /* fadd */
+		  clear_C1();
+		  reg_add(st0_ptr, &loaded_data, st0_ptr,
+			  control_word);
+		  break;
+		case 1:         /* fmul */
+		  clear_C1();
+		  reg_mul(st0_ptr, &loaded_data, st0_ptr,
+			  control_word);
+		  break;
+		case 2:         /* fcom */
+		  compare_st_data(&loaded_data);
+		  break;
+		case 3:         /* fcomp */
+		  if ( !compare_st_data(&loaded_data) && !unmasked )
+		    pop();
+		  break;
+		case 4:         /* fsub */
+		  clear_C1();
+		  reg_sub(st0_ptr, &loaded_data, st0_ptr,
+			  control_word);
+		  break;
+		case 5:         /* fsubr */
+		  clear_C1();
+		  reg_sub(&loaded_data, st0_ptr, st0_ptr,
+			  control_word);
+		  break;
+		case 6:         /* fdiv */
+		  clear_C1();
+		  reg_div(st0_ptr, &loaded_data, st0_ptr,
+			  control_word);
+		  break;
+		case 7:         /* fdivr */
+		  clear_C1();
+		  if ( st0_tag == TW_Zero )
+		    partial_status = status1;  /* Undo any denorm tag,
+					       zero-divide has priority. */
+		  reg_div(&loaded_data, st0_ptr, st0_ptr,
+			  control_word);
+		  break;
+		}
+	    }
+	  else
+	    {
+	      if ( (FPU_modrm & 0x30) == 0x10 )
+		{
+		  /* The instruction is fcom or fcomp */
+		  EXCEPTION(EX_StackUnder);
+		  setcc(SW_C3 | SW_C2 | SW_C0);
+		  if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
+		    pop();             /* fcomp */
+		}
+	      else
+		stack_underflow();
+	    }
+	reg_mem_instr_done:
+	  operand_address = data_sel_off;
+	}
+      else
+	{
+	  if ( !(no_ip_update =
+		 load_store_instr(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1,
+				  addr_modes, data_address)) )
+	    {
+	      operand_address = data_sel_off;
+	    }
+	}
+
+    }
+  else
+    {
+      /* None of these instructions access user memory */
+      unsigned char instr_index = (FPU_modrm & 0x38) | (byte1 & 7);
+
+#ifdef PECULIAR_486
+      /* This is supposed to be undefined, but a real 80486 seems
+	 to do this: */
+      operand_address.offset = 0;
+      operand_address.selector = FPU_DS;
+#endif PECULIAR_486
+
+      st0_ptr = &st(0);
+      st0_tag = st0_ptr->tag;
+      switch ( type_table[(int) instr_index] )
+	{
+	case _NONE_:   /* also _REGIc: _REGIn */
+	  break;
+	case _REG0_:
+	  if ( !NOT_EMPTY_ST0 )
+	    {
+	      stack_underflow();
+	      goto FPU_instruction_done;
+	    }
+	  break;
+	case _REGIi:
+	  if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
+	    {
+	      stack_underflow_i(FPU_rm);
+	      goto FPU_instruction_done;
+	    }
+	  break;
+	case _REGIp:
+	  if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
+	    {
+	      stack_underflow_pop(FPU_rm);
+	      goto FPU_instruction_done;
+	    }
+	  break;
+	case _REGI_:
+	  if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
+	    {
+	      stack_underflow();
+	      goto FPU_instruction_done;
+	    }
+	  break;
+	case _PUSH_:     /* Only used by the fld st(i) instruction */
+	  break;
+	case _null_:
+	  FPU_illegal();
+	  goto FPU_instruction_done;
+	default:
+	  EXCEPTION(EX_INTERNAL|0x111);
+	  goto FPU_instruction_done;
+	}
+      (*st_instr_table[(int) instr_index])();
+
+FPU_instruction_done:
+      ;
+    }
+
+  if ( ! no_ip_update )
+    instruction_address = entry_sel_off;
+
+FPU_fwait_done:
+
+#ifdef DEBUG
+  RE_ENTRANT_CHECK_OFF;
+  emu_printall();
+  RE_ENTRANT_CHECK_ON;
+#endif DEBUG
+
+  if (FPU_lookahead && !need_resched)
+    {
+      FPU_ORIG_EIP = FPU_EIP - code_base;
+      if ( valid_prefix(&byte1, (unsigned char **)&FPU_EIP,
+			&addr_modes.override) )
+	goto do_another_FPU_instruction;
+    }
+
+  if ( addr_modes.default_mode )
+    FPU_EIP -= code_base;
+
+  RE_ENTRANT_CHECK_OFF;
+}
+
+
+/* Support for prefix bytes is not yet complete. To properly handle
+   all prefix bytes, further changes are needed in the emulator code
+   which accesses user address space. Access to separate segments is
+   important for msdos emulation. */
+static int valid_prefix(unsigned char *Byte, unsigned char **fpu_eip,
+			overrides *override)
+{
+  unsigned char byte;
+  unsigned char *ip = *fpu_eip;
+
+  *override = (overrides) { 0, 0, PREFIX_DEFAULT };       /* defaults */
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_code_verify_area(1);
+  byte = get_fs_byte(ip);
+  RE_ENTRANT_CHECK_ON;
+
+  while ( 1 )
+    {
+      switch ( byte )
+	{
+	case ADDR_SIZE_PREFIX:
+	  override->address_size = ADDR_SIZE_PREFIX;
+	  goto do_next_byte;
+
+	case OP_SIZE_PREFIX:
+	  override->operand_size = OP_SIZE_PREFIX;
+	  goto do_next_byte;
+
+	case PREFIX_CS:
+	  override->segment = PREFIX_CS_;
+	  goto do_next_byte;
+	case PREFIX_ES:
+	  override->segment = PREFIX_ES_;
+	  goto do_next_byte;
+	case PREFIX_SS:
+	  override->segment = PREFIX_SS_;
+	  goto do_next_byte;
+	case PREFIX_FS:
+	  override->segment = PREFIX_FS_;
+	  goto do_next_byte;
+	case PREFIX_GS:
+	  override->segment = PREFIX_GS_;
+	  goto do_next_byte;
+	case PREFIX_DS:
+	  override->segment = PREFIX_DS_;
+	  goto do_next_byte;
+
+/* lock is not a valid prefix for FPU instructions,
+   let the cpu handle it to generate a SIGILL. */
+/*	case PREFIX_LOCK: */
+
+	  /* rep.. prefixes have no meaning for FPU instructions */
+	case PREFIX_REPE:
+	case PREFIX_REPNE:
+
+	do_next_byte:
+	  ip++;
+	  RE_ENTRANT_CHECK_OFF;
+	  FPU_code_verify_area(1);
+	  byte = get_fs_byte(ip);
+	  RE_ENTRANT_CHECK_ON;
+	  break;
+	case FWAIT_OPCODE:
+	  *Byte = byte;
+	  return 1;
+	default:
+	  if ( (byte & 0xf8) == 0xd8 )
+	    {
+	      *Byte = byte;
+	      *fpu_eip = ip;
+	      return 1;
+	    }
+	  else
+	    {
+	      /* Not a valid sequence of prefix bytes followed by
+		 an FPU instruction. */
+	      *Byte = byte;  /* Needed for error message. */
+	      return 0;
+	    }
+	}
+    }
+}
+
+
+void math_abort(struct info * info, unsigned int signal)
+{
+	FPU_EIP = FPU_ORIG_EIP;
+	current->tss.trap_no = 16;
+	current->tss.error_code = 0;
+	send_sig(signal,current,1);
+	RE_ENTRANT_CHECK_OFF;
+	__asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4));
+#ifdef PARANOID
+      printk("ERROR: wm-FPU-emu math_abort failed!\n");
+#endif PARANOID
+}
diff --git a/arch/i386/math-emu/fpu_etc.c b/arch/i386/math-emu/fpu_etc.c
new file mode 100644
index 000000000..20e3294ca
--- /dev/null
+++ b/arch/i386/math-emu/fpu_etc.c
@@ -0,0 +1,129 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_etc.c                                                                |
+ |                                                                           |
+ | Implement a few FPU instructions.                                         |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_system.h"
+#include "exception.h"
+#include "fpu_emu.h"
+#include "status_w.h"
+#include "reg_constant.h"
+
+
+static void fchs(FPU_REG *st0_ptr)
+{
+  if ( st0_ptr->tag ^ TW_Empty )
+    {
+      st0_ptr->sign ^= SIGN_POS^SIGN_NEG;
+      clear_C1();
+    }
+  else
+    stack_underflow();
+}
+
+static void fabs(FPU_REG *st0_ptr)
+{
+  if ( st0_ptr->tag ^ TW_Empty )
+    {
+      st0_ptr->sign = SIGN_POS;
+      clear_C1();
+    }
+  else
+    stack_underflow();
+}
+
+
+static void ftst_(FPU_REG *st0_ptr)
+{
+  switch (st0_ptr->tag)
+    {
+    case TW_Zero:
+      setcc(SW_C3);
+      break;
+    case TW_Valid:
+      if (st0_ptr->sign == SIGN_POS)
+        setcc(0);
+      else
+        setcc(SW_C0);
+
+#ifdef DENORM_OPERAND
+      if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	{
+#ifdef PECULIAR_486
+	  /* This is weird! */
+	  if (st0_ptr->sign == SIGN_POS)
+	    setcc(SW_C3);
+#endif PECULIAR_486
+	  return;
+	}
+#endif DENORM_OPERAND
+
+      break;
+    case TW_NaN:
+      setcc(SW_C0|SW_C2|SW_C3);   /* Operand is not comparable */ 
+      EXCEPTION(EX_Invalid);
+      break;
+    case TW_Infinity:
+      if (st0_ptr->sign == SIGN_POS)
+        setcc(0);
+      else
+        setcc(SW_C0);
+      break;
+    case TW_Empty:
+      setcc(SW_C0|SW_C2|SW_C3);
+      EXCEPTION(EX_StackUnder);
+      break;
+    default:
+      setcc(SW_C0|SW_C2|SW_C3);   /* Operand is not comparable */ 
+      EXCEPTION(EX_INTERNAL|0x14);
+      break;
+    }
+}
+
+static void fxam(FPU_REG *st0_ptr)
+{
+  int c=0;
+  switch (st0_ptr->tag)
+    {
+    case TW_Empty:
+      c = SW_C3|SW_C0;
+      break;
+    case TW_Zero:
+      c = SW_C3;
+      break;
+    case TW_Valid:
+      /* This will need to be changed if TW_Denormal is ever used. */
+      if ( st0_ptr->exp <= EXP_UNDER )
+        c = SW_C2|SW_C3;  /* Denormal */
+      else
+        c = SW_C2;
+      break;
+    case TW_NaN:
+      c = SW_C0;
+      break;
+    case TW_Infinity:
+      c = SW_C2|SW_C0;
+      break;
+    }
+  if (st0_ptr->sign == SIGN_NEG)
+    c |= SW_C1;
+  setcc(c);
+}
+
+
+static FUNC_ST0 const fp_etc_table[] = {
+  fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal,
+  ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal
+};
+
+void fp_etc()
+{
+  (fp_etc_table[FPU_rm])(&st(0));
+}
diff --git a/arch/i386/math-emu/fpu_proto.h b/arch/i386/math-emu/fpu_proto.h
new file mode 100644
index 000000000..b4392fe57
--- /dev/null
+++ b/arch/i386/math-emu/fpu_proto.h
@@ -0,0 +1,137 @@
+/* errors.c */
+extern void Un_impl(void);
+extern void FPU_illegal(void);
+extern void emu_printall(void);
+extern void stack_overflow(void);
+extern void stack_underflow(void);
+extern void stack_underflow_i(int i);
+extern void stack_underflow_pop(int i);
+extern int set_precision_flag(int flags);
+asmlinkage void exception(int n);
+asmlinkage int real_2op_NaN(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest);
+asmlinkage int arith_invalid(FPU_REG *dest);
+asmlinkage int divide_by_zero(int sign, FPU_REG *dest);
+asmlinkage void set_precision_flag_up(void);
+asmlinkage void set_precision_flag_down(void);
+asmlinkage int denormal_operand(void);
+asmlinkage int arith_overflow(FPU_REG *dest);
+asmlinkage int arith_underflow(FPU_REG *dest);
+
+/* fpu_arith.c */
+extern void fadd__(void);
+extern void fmul__(void);
+extern void fsub__(void);
+extern void fsubr_(void);
+extern void fdiv__(void);
+extern void fdivr_(void);
+extern void fadd_i(void);
+extern void fmul_i(void);
+extern void fsubri(void);
+extern void fsub_i(void);
+extern void fdivri(void);
+extern void fdiv_i(void);
+extern void faddp_(void);
+extern void fmulp_(void);
+extern void fsubrp(void);
+extern void fsubp_(void);
+extern void fdivrp(void);
+extern void fdivp_(void);
+
+/* fpu_aux.c */
+extern void fclex(void);
+extern void finit(void);
+extern void finit_(void);
+extern void fstsw_(void);
+extern void fp_nop(void);
+extern void fld_i_(void);
+extern void fxch_i(void);
+extern void ffree_(void);
+extern void ffreep(void);
+extern void fst_i_(void);
+extern void fstp_i(void);
+
+/* fpu_entry.c */
+asmlinkage void math_emulate(long arg);
+extern void math_abort(struct info *info, unsigned int signal);
+
+/* fpu_etc.c */
+extern void fp_etc(void);
+
+/* fpu_trig.c */
+extern void convert_l2reg(long const *arg, FPU_REG *dest);
+extern void trig_a(void);
+extern void trig_b(void);
+
+/* get_address.c */
+extern void *get_address(unsigned char FPU_modrm, unsigned long *fpu_eip,
+			 struct address *addr,
+			 fpu_addr_modes);
+extern void *get_address_16(unsigned char FPU_modrm, unsigned long *fpu_eip,
+			    struct address *addr,
+			    fpu_addr_modes);
+
+/* load_store.c */
+extern int load_store_instr(unsigned char type, fpu_addr_modes addr_modes,
+			     void *address);
+
+/* poly_2xm1.c */
+extern int poly_2xm1(FPU_REG const *arg, FPU_REG *result);
+
+/* poly_atan.c */
+extern void poly_atan(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *result);
+
+/* poly_l2.c */
+extern void poly_l2(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result);
+extern int poly_l2p1(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result);
+
+/* poly_sin.c */
+extern void poly_sine(FPU_REG const *arg, FPU_REG *result);
+extern void poly_cos(FPU_REG const *arg, FPU_REG *result);
+
+/* poly_tan.c */
+extern void poly_tan(FPU_REG const *arg, FPU_REG *result);
+
+/* reg_add_sub.c */
+extern int reg_add(FPU_REG const *a, FPU_REG const *b,
+		   FPU_REG *dest, int control_w);
+extern int reg_sub(FPU_REG const *a, FPU_REG const *b,
+		   FPU_REG *dest, int control_w);
+
+/* reg_compare.c */
+extern int compare(FPU_REG const *b);
+extern int compare_st_data(FPU_REG const *b);
+extern void fcom_st(void);
+extern void fcompst(void);
+extern void fcompp(void);
+extern void fucom_(void);
+extern void fucomp(void);
+extern void fucompp(void);
+
+/* reg_constant.c */
+extern void fconst(void);
+
+/* reg_ld_str.c */
+extern int reg_load_extended(long double *addr, FPU_REG *loaded_data);
+extern int reg_load_double(double *dfloat, FPU_REG *loaded_data);
+extern int reg_load_single(float *single, FPU_REG *loaded_data);
+extern void reg_load_int64(long long *_s, FPU_REG *loaded_data);
+extern void reg_load_int32(long *_s, FPU_REG *loaded_data);
+extern void reg_load_int16(short *_s, FPU_REG *loaded_data);
+extern void reg_load_bcd(char *s, FPU_REG *loaded_data);
+extern int reg_store_extended(long double *d, FPU_REG *st0_ptr);
+extern int reg_store_double(double *dfloat, FPU_REG *st0_ptr);
+extern int reg_store_single(float *single, FPU_REG *st0_ptr);
+extern int reg_store_int64(long long *d, FPU_REG *st0_ptr);
+extern int reg_store_int32(long *d, FPU_REG *st0_ptr);
+extern int reg_store_int16(short *d, FPU_REG *st0_ptr);
+extern int reg_store_bcd(char *d, FPU_REG *st0_ptr);
+extern int round_to_int(FPU_REG *r);
+extern char *fldenv(fpu_addr_modes addr_modes, char *address);
+extern void frstor(fpu_addr_modes addr_modes, char *address);
+extern unsigned short tag_word(void);
+extern char *fstenv(fpu_addr_modes addr_modes, char *address);
+extern void fsave(fpu_addr_modes addr_modes, char *address);
+
+/* reg_mul.c */
+extern int reg_mul(FPU_REG const *a, FPU_REG const *b,
+		   FPU_REG *dest, unsigned int control_w);
diff --git a/arch/i386/math-emu/fpu_system.h b/arch/i386/math-emu/fpu_system.h
new file mode 100644
index 000000000..d2c3fa716
--- /dev/null
+++ b/arch/i386/math-emu/fpu_system.h
@@ -0,0 +1,83 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_system.h                                                             |
+ |                                                                           |
+ | Copyright (C) 1992,1994                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#ifndef _FPU_SYSTEM_H
+#define _FPU_SYSTEM_H
+
+/* system dependent definitions */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+/* This sets the pointer FPU_info to point to the argument part
+   of the stack frame of math_emulate() */
+#define SETUP_DATA_AREA(arg)    FPU_info = (struct info *) &arg
+
+#define LDT_DESCRIPTOR(s)       (current->ldt[(s) >> 3])
+#define SEG_D_SIZE(x)           ((x).b & (3 << 21))
+#define SEG_G_BIT(x)            ((x).b & (1 << 23))
+#define SEG_GRANULARITY(x)      (((x).b & (1 << 23)) ? 4096 : 1)
+#define SEG_286_MODE(x)         ((x).b & ( 0xff000000 | 0xf0000 | (1 << 23)))
+#define SEG_BASE_ADDR(s)        (((s).b & 0xff000000) \
+				 | (((s).b & 0xff) << 16) | ((s).a >> 16))
+#define SEG_LIMIT(s)            (((s).b & 0xff0000) | ((s).a & 0xffff))
+#define SEG_EXECUTE_ONLY(s)     (((s).b & ((1 << 11) | (1 << 9))) == (1 << 11))
+#define SEG_WRITE_PERM(s)       (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9))
+#define SEG_EXPAND_DOWN(s)      (((s).b & ((1 << 11) | (1 << 10))) \
+				 == (1 << 10))
+
+#define I387			(current->tss.i387)
+#define FPU_info		(I387.soft.info)
+
+#define FPU_CS			(*(unsigned short *) &(FPU_info->___cs))
+#define FPU_SS			(*(unsigned short *) &(FPU_info->___ss))
+#define FPU_DS			(*(unsigned short *) &(FPU_info->___ds))
+#define FPU_EAX			(FPU_info->___eax)
+#define FPU_EFLAGS		(FPU_info->___eflags)
+#define FPU_EIP			(FPU_info->___eip)
+#define FPU_ORIG_EIP		(FPU_info->___orig_eip)
+
+#define FPU_lookahead           (I387.soft.lookahead)
+
+/* nz if ip_offset and cs_selector are not to be set for the current
+   instruction. */
+#define no_ip_update            (((char *)&(I387.soft.twd))[0])
+#define FPU_rm                  (((unsigned char *)&(I387.soft.twd))[1])
+
+/* Number of bytes of data which can be legally accessed by the current
+   instruction. This only needs to hold a number <= 108, so a byte will do. */
+#define access_limit            (((unsigned char *)&(I387.soft.twd))[2])
+
+#define partial_status       	(I387.soft.swd)
+#define control_word		(I387.soft.cwd)
+#define regs			(I387.soft.regs)
+#define top			(I387.soft.top)
+
+#define instruction_address     (*(struct address *)&I387.soft.fip)
+#define operand_address         (*(struct address *)&I387.soft.foo)
+
+#define FPU_verify_area(x,y,z)  if ( verify_area(x,y,z) ) \
+                                math_abort(FPU_info,SIGSEGV)
+
+#undef FPU_IGNORE_CODE_SEGV
+#ifdef FPU_IGNORE_CODE_SEGV
+/* verify_area() is very expensive, and causes the emulator to run
+   about 20% slower if applied to the code. Anyway, errors due to bad
+   code addresses should be much rarer than errors due to bad data
+   addresses. */
+#define	FPU_code_verify_area(z)
+#else
+/* A simpler test than verify_area() can probably be done for
+   FPU_code_verify_area() because the only possible error is to step
+   past the upper boundary of a legal code area. */
+#define	FPU_code_verify_area(z) FPU_verify_area(VERIFY_READ,(void *)FPU_EIP,z)
+#endif
+
+#endif
diff --git a/arch/i386/math-emu/fpu_trig.c b/arch/i386/math-emu/fpu_trig.c
new file mode 100644
index 000000000..05241f700
--- /dev/null
+++ b/arch/i386/math-emu/fpu_trig.c
@@ -0,0 +1,1718 @@
+/*---------------------------------------------------------------------------+
+ |  fpu_trig.c                                                               |
+ |                                                                           |
+ | Implementation of the FPU "transcendental" functions.                     |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_system.h"
+#include "exception.h"
+#include "fpu_emu.h"
+#include "status_w.h"
+#include "control_w.h"
+#include "reg_constant.h"	
+
+
+static void rem_kernel(unsigned long long st0, unsigned long long *y,
+		       unsigned long long st1,
+		       unsigned long long q, int n);
+
+#define BETTER_THAN_486
+
+#define FCOS  4
+/* Not needed now with new code
+#define FPTAN 1
+ */
+
+/* Used only by fptan, fsin, fcos, and fsincos. */
+/* This routine produces very accurate results, similar to
+   using a value of pi with more than 128 bits precision. */
+/* Limited measurements show no results worse than 64 bit precision
+   except for the results for arguments close to 2^63, where the
+   precision of the result sometimes degrades to about 63.9 bits */
+static int trig_arg(FPU_REG *X, int even)
+{
+  FPU_REG tmp;
+  unsigned long long q;
+  int old_cw = control_word, saved_status = partial_status;
+
+  if ( X->exp >= EXP_BIAS + 63 )
+    {
+      partial_status |= SW_C2;     /* Reduction incomplete. */
+      return -1;
+    }
+
+  control_word &= ~CW_RC;
+  control_word |= RC_CHOP;
+
+  reg_div(X, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f);
+  round_to_int(&tmp);  /* Fortunately, this can't overflow
+			  to 2^64 */
+  q = significand(&tmp);
+  if ( q )
+    {
+      rem_kernel(significand(X),
+		 &significand(&tmp),
+		 significand(&CONST_PI2),
+		 q, X->exp - CONST_PI2.exp);
+      tmp.exp = CONST_PI2.exp;
+      normalize(&tmp);
+      reg_move(&tmp, X);
+    }
+
+#ifdef FPTAN
+  if ( even == FPTAN )
+    {
+      if ( ((X->exp >= EXP_BIAS) ||
+	    ((X->exp == EXP_BIAS-1)
+	     && (X->sigh >= 0xc90fdaa2))) ^ (q & 1) )
+	even = FCOS;
+      else
+	even = 0;
+    }
+#endif FPTAN
+
+  if ( (even && !(q & 1)) || (!even && (q & 1)) )
+    {
+      reg_sub(&CONST_PI2, X, X, FULL_PRECISION);
+#ifdef BETTER_THAN_486
+      /* So far, the results are exact but based upon a 64 bit
+	 precision approximation to pi/2. The technique used
+	 now is equivalent to using an approximation to pi/2 which
+	 is accurate to about 128 bits. */
+      if ( (X->exp <= CONST_PI2extra.exp + 64) || (q > 1) )
+	{
+	  /* This code gives the effect of having p/2 to better than
+	     128 bits precision. */
+	  significand(&tmp) = q + 1;
+	  tmp.exp = EXP_BIAS + 63;
+	  tmp.tag = TW_Valid;
+	  normalize(&tmp);
+	  reg_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION);
+	  reg_add(X, &tmp,  X, FULL_PRECISION);
+	  if ( X->sign == SIGN_NEG )
+	    {
+	      /* CONST_PI2extra is negative, so the result of the addition
+		 can be negative. This means that the argument is actually
+		 in a different quadrant. The correction is always < pi/2,
+		 so it can't overflow into yet another quadrant. */
+	      X->sign = SIGN_POS;
+	      q++;
+	    }
+	}
+#endif BETTER_THAN_486
+    }
+#ifdef BETTER_THAN_486
+  else
+    {
+      /* So far, the results are exact but based upon a 64 bit
+	 precision approximation to pi/2. The technique used
+	 now is equivalent to using an approximation to pi/2 which
+	 is accurate to about 128 bits. */
+      if ( ((q > 0) && (X->exp <= CONST_PI2extra.exp + 64)) || (q > 1) )
+	{
+	  /* This code gives the effect of having p/2 to better than
+	     128 bits precision. */
+	  significand(&tmp) = q;
+	  tmp.exp = EXP_BIAS + 63;
+	  tmp.tag = TW_Valid;
+	  normalize(&tmp);
+	  reg_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION);
+	  reg_sub(X, &tmp, X, FULL_PRECISION);
+	  if ( (X->exp == CONST_PI2.exp) &&
+	      ((X->sigh > CONST_PI2.sigh)
+	       || ((X->sigh == CONST_PI2.sigh)
+		   && (X->sigl > CONST_PI2.sigl))) )
+	    {
+	      /* CONST_PI2extra is negative, so the result of the
+		 subtraction can be larger than pi/2. This means
+		 that the argument is actually in a different quadrant.
+		 The correction is always < pi/2, so it can't overflow
+		 into yet another quadrant. */
+	      reg_sub(&CONST_PI, X, X, FULL_PRECISION);
+	      q++;
+	    }
+	}
+    }
+#endif BETTER_THAN_486
+
+  control_word = old_cw;
+  partial_status = saved_status & ~SW_C2;     /* Reduction complete. */
+
+  return (q & 3) | even;
+}
+
+
+/* Convert a long to register */
+void convert_l2reg(long const *arg, FPU_REG *dest)
+{
+  long num = *arg;
+
+  if (num == 0)
+    { reg_move(&CONST_Z, dest); return; }
+
+  if (num > 0)
+    dest->sign = SIGN_POS;
+  else
+    { num = -num; dest->sign = SIGN_NEG; }
+
+  dest->sigh = num;
+  dest->sigl = 0;
+  dest->exp = EXP_BIAS + 31;
+  dest->tag = TW_Valid;
+  normalize(dest);
+}
+
+
+static void single_arg_error(FPU_REG *st0_ptr)
+{
+  switch ( st0_ptr->tag )
+    {
+    case TW_NaN:
+      if ( !(st0_ptr->sigh & 0x40000000) )   /* Signaling ? */
+	{
+	  EXCEPTION(EX_Invalid);
+	  if ( control_word & CW_Invalid )
+	    st0_ptr->sigh |= 0x40000000;	  /* Convert to a QNaN */
+	}
+      break;              /* return with a NaN in st(0) */
+    case TW_Empty:
+      stack_underflow();  /* Puts a QNaN in st(0) */
+      break;
+#ifdef PARANOID
+    default:
+      EXCEPTION(EX_INTERNAL|0x0112);
+#endif PARANOID
+    }
+}
+
+
+static void single_arg_2_error(FPU_REG *st0_ptr)
+{
+  FPU_REG *st_new_ptr;
+
+  switch ( st0_ptr->tag )
+    {
+    case TW_NaN:
+      if ( !(st0_ptr->sigh & 0x40000000) )   /* Signaling ? */
+	{
+	  EXCEPTION(EX_Invalid);
+	  if ( control_word & CW_Invalid )
+	    {
+	      /* The masked response */
+	      /* Convert to a QNaN */
+	      st0_ptr->sigh |= 0x40000000;
+	      st_new_ptr = &st(-1);
+	      push();
+	      reg_move(&st(1), st_new_ptr);
+	    }
+	}
+      else
+	{
+	  /* A QNaN */
+	  st_new_ptr = &st(-1);
+	  push();
+	  reg_move(&st(1), st_new_ptr);
+	}
+      break;              /* return with a NaN in st(0) */
+#ifdef PARANOID
+    default:
+      EXCEPTION(EX_INTERNAL|0x0112);
+#endif PARANOID
+    }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static void f2xm1(FPU_REG *st0_ptr)
+{
+  clear_C1();
+  switch ( st0_ptr->tag )
+    {
+    case TW_Valid:
+      {
+	if ( st0_ptr->exp >= 0 )
+	  {
+	    /* For an 80486 FPU, the result is undefined. */
+	  }
+#ifdef DENORM_OPERAND
+	else if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	  return;
+#endif DENORM_OPERAND
+	else
+	  {
+	    /* poly_2xm1(x) requires 0 < x < 1. */
+	    poly_2xm1(st0_ptr, st0_ptr);
+	  }
+	if ( st0_ptr->exp <= EXP_UNDER )
+	  {
+	    /* A denormal result has been produced.
+	       Precision must have been lost, this is always
+	       an underflow. */
+	    arith_underflow(st0_ptr);
+	  }
+	set_precision_flag_up();   /* 80486 appears to always do this */
+	return;
+      }
+    case TW_Zero:
+      return;
+    case TW_Infinity:
+      if ( st0_ptr->sign == SIGN_NEG )
+	{
+	  /* -infinity gives -1 (p16-10) */
+	  reg_move(&CONST_1, st0_ptr);
+	  st0_ptr->sign = SIGN_NEG;
+	}
+      return;
+    default:
+      single_arg_error(st0_ptr);
+    }
+}
+
+
+static void fptan(FPU_REG *st0_ptr)
+{
+  char st0_tag = st0_ptr->tag;
+  FPU_REG *st_new_ptr;
+  int q;
+  char arg_sign = st0_ptr->sign;
+
+  /* Stack underflow has higher priority */
+  if ( st0_tag == TW_Empty )
+    {
+      stack_underflow();  /* Puts a QNaN in st(0) */
+      if ( control_word & CW_Invalid )
+	{
+	  st_new_ptr = &st(-1);
+	  push();
+	  stack_underflow();  /* Puts a QNaN in the new st(0) */
+	}
+      return;
+    }
+
+  if ( STACK_OVERFLOW )
+    { stack_overflow(); return; }
+
+  switch ( st0_tag )
+    {
+    case TW_Valid:
+      if ( st0_ptr->exp > EXP_BIAS - 40 )
+	{
+	  st0_ptr->sign = SIGN_POS;
+	  if ( (q = trig_arg(st0_ptr, 0)) != -1 )
+	    {
+	      poly_tan(st0_ptr, st0_ptr);
+	      st0_ptr->sign = (q & 1) ^ arg_sign;
+	    }
+	  else
+	    {
+	      /* Operand is out of range */
+	      st0_ptr->sign = arg_sign;         /* restore st(0) */
+	      return;
+	    }
+	  set_precision_flag_up();  /* We do not really know if up or down */
+	}
+      else
+	{
+	  /* For a small arg, the result == the argument */
+	  /* Underflow may happen */
+
+	  if ( st0_ptr->exp <= EXP_UNDER )
+	    {
+#ifdef DENORM_OPERAND
+	      if ( denormal_operand() )
+		return;
+#endif DENORM_OPERAND
+	      /* A denormal result has been produced.
+		 Precision must have been lost, this is always
+		 an underflow. */
+	      if ( arith_underflow(st0_ptr) )
+		return;
+	    }
+	  set_precision_flag_down();  /* Must be down. */
+	}
+      push();
+      reg_move(&CONST_1, st_new_ptr);
+      return;
+      break;
+    case TW_Infinity:
+      /* The 80486 treats infinity as an invalid operand */
+      arith_invalid(st0_ptr);
+      if ( control_word & CW_Invalid )
+	{
+	  st_new_ptr = &st(-1);
+	  push();
+	  arith_invalid(st_new_ptr);
+	}
+      return;
+    case TW_Zero:
+      push();
+      reg_move(&CONST_1, st_new_ptr);
+      setcc(0);
+      break;
+    default:
+      single_arg_2_error(st0_ptr);
+      break;
+    }
+}
+
+
+static void fxtract(FPU_REG *st0_ptr)
+{
+  char st0_tag = st0_ptr->tag;
+  FPU_REG *st_new_ptr;
+  register FPU_REG *st1_ptr = st0_ptr;  /* anticipate */
+
+  if ( STACK_OVERFLOW )
+    {  stack_overflow(); return; }
+  clear_C1();
+  if ( !(st0_tag ^ TW_Valid) )
+    {
+      long e;
+
+#ifdef DENORM_OPERAND
+      if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	return;
+#endif DENORM_OPERAND
+	  
+      push();
+      reg_move(st1_ptr, st_new_ptr);
+      st_new_ptr->exp = EXP_BIAS;
+      e = st1_ptr->exp - EXP_BIAS;
+      convert_l2reg(&e, st1_ptr);
+      return;
+    }
+  else if ( st0_tag == TW_Zero )
+    {
+      char sign = st0_ptr->sign;
+      if ( divide_by_zero(SIGN_NEG, st0_ptr) )
+	return;
+      push();
+      reg_move(&CONST_Z, st_new_ptr);
+      st_new_ptr->sign = sign;
+      return;
+    }
+  else if ( st0_tag == TW_Infinity )
+    {
+      char sign = st0_ptr->sign;
+      st0_ptr->sign = SIGN_POS;
+      push();
+      reg_move(&CONST_INF, st_new_ptr);
+      st_new_ptr->sign = sign;
+      return;
+    }
+  else if ( st0_tag == TW_NaN )
+    {
+      if ( real_2op_NaN(st0_ptr, st0_ptr, st0_ptr) )
+	return;
+      push();
+      reg_move(st1_ptr, st_new_ptr);
+      return;
+    }
+  else if ( st0_tag == TW_Empty )
+    {
+      /* Is this the correct behaviour? */
+      if ( control_word & EX_Invalid )
+	{
+	  stack_underflow();
+	  push();
+	  stack_underflow();
+	}
+      else
+	EXCEPTION(EX_StackUnder);
+    }
+#ifdef PARANOID
+  else
+    EXCEPTION(EX_INTERNAL | 0x119);
+#endif PARANOID
+}
+
+
+static void fdecstp(FPU_REG *st0_ptr)
+{
+  clear_C1();
+  top--;  /* st0_ptr will be fixed in math_emulate() before the next instr */
+}
+
+static void fincstp(FPU_REG *st0_ptr)
+{
+  clear_C1();
+  top++;  /* st0_ptr will be fixed in math_emulate() before the next instr */
+}
+
+
+static void fsqrt_(FPU_REG *st0_ptr)
+{
+  char st0_tag = st0_ptr->tag;
+
+  clear_C1();
+  if ( !(st0_tag ^ TW_Valid) )
+    {
+      int expon;
+      
+      if (st0_ptr->sign == SIGN_NEG)
+	{
+	  arith_invalid(st0_ptr);  /* sqrt(negative) is invalid */
+	  return;
+	}
+
+#ifdef DENORM_OPERAND
+      if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	return;
+#endif DENORM_OPERAND
+
+      expon = st0_ptr->exp - EXP_BIAS;
+      st0_ptr->exp = EXP_BIAS + (expon & 1);  /* make st(0) in  [1.0 .. 4.0) */
+      
+      wm_sqrt(st0_ptr, control_word);	/* Do the computation */
+      
+      st0_ptr->exp += expon >> 1;
+      st0_ptr->sign = SIGN_POS;
+    }
+  else if ( st0_tag == TW_Zero )
+    return;
+  else if ( st0_tag == TW_Infinity )
+    {
+      if ( st0_ptr->sign == SIGN_NEG )
+	arith_invalid(st0_ptr);  /* sqrt(-Infinity) is invalid */
+      return;
+    }
+  else
+    { single_arg_error(st0_ptr); return; }
+
+}
+
+
+static void frndint_(FPU_REG *st0_ptr)
+{
+  char st0_tag = st0_ptr->tag;
+  int flags;
+
+  if ( !(st0_tag ^ TW_Valid) )
+    {
+      if (st0_ptr->exp > EXP_BIAS+63)
+	return;
+
+#ifdef DENORM_OPERAND
+      if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	return;
+#endif DENORM_OPERAND
+
+      /* Fortunately, this can't overflow to 2^64 */
+      if ( (flags = round_to_int(st0_ptr)) )
+	set_precision_flag(flags);
+
+      st0_ptr->exp = EXP_BIAS + 63;
+      normalize(st0_ptr);
+      return;
+    }
+  else if ( (st0_tag == TW_Zero) || (st0_tag == TW_Infinity) )
+    return;
+  else
+    single_arg_error(st0_ptr);
+}
+
+
+static void fsin(FPU_REG *st0_ptr)
+{
+  char st0_tag = st0_ptr->tag;
+  char arg_sign = st0_ptr->sign;
+
+  if ( st0_tag == TW_Valid )
+    {
+      FPU_REG rv;
+      int q;
+
+      if ( st0_ptr->exp > EXP_BIAS - 40 )
+	{
+	  st0_ptr->sign = SIGN_POS;
+	  if ( (q = trig_arg(st0_ptr, 0)) != -1 )
+	    {
+
+	      poly_sine(st0_ptr, &rv);
+
+	      if (q & 2)
+		rv.sign ^= SIGN_POS ^ SIGN_NEG;
+	      rv.sign ^= arg_sign;
+	      reg_move(&rv, st0_ptr);
+
+	      /* We do not really know if up or down */
+	      set_precision_flag_up();
+	      return;
+	    }
+	  else
+	    {
+	      /* Operand is out of range */
+	      st0_ptr->sign = arg_sign;         /* restore st(0) */
+	      return;
+	    }
+	}
+      else
+	{
+	  /* For a small arg, the result == the argument */
+	  /* Underflow may happen */
+
+	  if ( st0_ptr->exp <= EXP_UNDER )
+	    {
+#ifdef DENORM_OPERAND
+	      if ( denormal_operand() )
+		return;
+#endif DENORM_OPERAND
+	      /* A denormal result has been produced.
+		 Precision must have been lost, this is always
+		 an underflow. */
+	      arith_underflow(st0_ptr);
+	      return;
+	    }
+
+	  set_precision_flag_up();  /* Must be up. */
+	}
+    }
+  else if ( st0_tag == TW_Zero )
+    {
+      setcc(0);
+      return;
+    }
+  else if ( st0_tag == TW_Infinity )
+    {
+      /* The 80486 treats infinity as an invalid operand */
+      arith_invalid(st0_ptr);
+      return;
+    }
+  else
+    single_arg_error(st0_ptr);
+}
+
+
+static int f_cos(FPU_REG *arg)
+{
+  char arg_sign = arg->sign;
+
+  if ( arg->tag == TW_Valid )
+    {
+      FPU_REG rv;
+      int q;
+
+      if ( arg->exp > EXP_BIAS - 40 )
+	{
+	  arg->sign = SIGN_POS;
+	  if ( (arg->exp < EXP_BIAS)
+	      || ((arg->exp == EXP_BIAS)
+		  && (significand(arg) <= 0xc90fdaa22168c234LL)) )
+	    {
+	      poly_cos(arg, &rv);
+	      reg_move(&rv, arg);
+
+	      /* We do not really know if up or down */
+	      set_precision_flag_down();
+	  
+	      return 0;
+	    }
+	  else if ( (q = trig_arg(arg, FCOS)) != -1 )
+	    {
+	      poly_sine(arg, &rv);
+
+	      if ((q+1) & 2)
+		rv.sign ^= SIGN_POS ^ SIGN_NEG;
+	      reg_move(&rv, arg);
+
+	      /* We do not really know if up or down */
+	      set_precision_flag_down();
+	  
+	      return 0;
+	    }
+	  else
+	    {
+	      /* Operand is out of range */
+	      arg->sign = arg_sign;         /* restore st(0) */
+	      return 1;
+	    }
+	}
+      else
+	{
+#ifdef DENORM_OPERAND
+	  if ( (arg->exp <= EXP_UNDER) && (denormal_operand()) )
+	    return 1;
+#endif DENORM_OPERAND
+
+	  setcc(0);
+	  reg_move(&CONST_1, arg);
+#ifdef PECULIAR_486
+	  set_precision_flag_down();  /* 80486 appears to do this. */
+#else
+	  set_precision_flag_up();  /* Must be up. */
+#endif PECULIAR_486
+	  return 0;
+	}
+    }
+  else if ( arg->tag == TW_Zero )
+    {
+      reg_move(&CONST_1, arg);
+      setcc(0);
+      return 0;
+    }
+  else if ( arg->tag == TW_Infinity )
+    {
+      /* The 80486 treats infinity as an invalid operand */
+      arith_invalid(arg);
+      return 1;
+    }
+  else
+    {
+      single_arg_error(arg);  /* requires arg == &st(0) */
+      return 1;
+    }
+}
+
+
+static void fcos(FPU_REG *st0_ptr)
+{
+  f_cos(st0_ptr);
+}
+
+
+static void fsincos(FPU_REG *st0_ptr)
+{
+  char st0_tag = st0_ptr->tag;
+  FPU_REG *st_new_ptr;
+  FPU_REG arg;
+
+  /* Stack underflow has higher priority */
+  if ( st0_tag == TW_Empty )
+    {
+      stack_underflow();  /* Puts a QNaN in st(0) */
+      if ( control_word & CW_Invalid )
+	{
+	  st_new_ptr = &st(-1);
+	  push();
+	  stack_underflow();  /* Puts a QNaN in the new st(0) */
+	}
+      return;
+    }
+
+  if ( STACK_OVERFLOW )
+    { stack_overflow(); return; }
+
+  if ( st0_tag == TW_NaN )
+    {
+      single_arg_2_error(st0_ptr);
+      return;
+    }
+  else if ( st0_tag == TW_Infinity )
+    {
+      /* The 80486 treats infinity as an invalid operand */
+      if ( !arith_invalid(st0_ptr) )
+	{
+	  /* unmasked response */
+	  push();
+	  arith_invalid(st_new_ptr);
+	}
+      return;
+    }
+
+  reg_move(st0_ptr,&arg);
+  if ( !f_cos(&arg) )
+    {
+      fsin(st0_ptr);
+      push();
+      reg_move(&arg,st_new_ptr);
+    }
+
+}
+
+
+/*---------------------------------------------------------------------------*/
+/* The following all require two arguments: st(0) and st(1) */
+
+/* A lean, mean kernel for the fprem instructions. This relies upon
+   the division and rounding to an integer in do_fprem giving an
+   exact result. Because of this, rem_kernel() needs to deal only with
+   the least significant 64 bits, the more significant bits of the
+   result must be zero.
+ */
+static void rem_kernel(unsigned long long st0, unsigned long long *y,
+		       unsigned long long st1,
+		       unsigned long long q, int n)
+{
+  unsigned long long x;
+
+  x = st0 << n;
+
+  /* Do the required multiplication and subtraction in the one operation */
+  asm volatile ("movl %2,%%eax; mull %4; subl %%eax,%0; sbbl %%edx,%1;
+                 movl %3,%%eax; mull %4; subl %%eax,%1;
+                 movl %2,%%eax; mull %5; subl %%eax,%1;"
+		:"=m" (x), "=m" (((unsigned *)&x)[1])
+		:"m" (st1),"m" (((unsigned *)&st1)[1]),
+		 "m" (q),"m" (((unsigned *)&q)[1])
+		:"%ax","%dx");
+
+  *y = x;
+}
+
+
+/* Remainder of st(0) / st(1) */
+/* This routine produces exact results, i.e. there is never any
+   rounding or truncation, etc of the result. */
+static void do_fprem(FPU_REG *st0_ptr, int round)
+{
+  FPU_REG *st1_ptr = &st(1);
+  char st1_tag = st1_ptr->tag;
+  char st0_tag = st0_ptr->tag;
+  char sign = st0_ptr->sign;
+
+  if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) )
+    {
+      FPU_REG tmp;
+      int old_cw = control_word;
+      int expdif = st0_ptr->exp - st1_ptr->exp;
+      long long q;
+      unsigned short saved_status;
+      int cc = 0;
+
+#ifdef DENORM_OPERAND
+      if ( ((st0_ptr->exp <= EXP_UNDER) ||
+	    (st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) )
+	return;
+#endif DENORM_OPERAND
+      
+      /* We want the status following the denorm tests, but don't want
+	 the status changed by the arithmetic operations. */
+      saved_status = partial_status;
+      control_word &= ~CW_RC;
+      control_word |= RC_CHOP;
+
+      if (expdif < 64)
+	{
+	  /* This should be the most common case */
+
+	  if ( expdif > -2 )
+	    {
+	      reg_div(st0_ptr, st1_ptr, &tmp, PR_64_BITS | RC_CHOP | 0x3f);
+
+	      if ( tmp.exp >= EXP_BIAS )
+		{
+		  round_to_int(&tmp);  /* Fortunately, this can't overflow
+					  to 2^64 */
+		  q = significand(&tmp);
+
+		  rem_kernel(significand(st0_ptr),
+			     &significand(&tmp),
+			     significand(st1_ptr),
+			     q, expdif);
+
+		  tmp.exp = st1_ptr->exp;
+		}
+	      else
+		{
+		  reg_move(st0_ptr, &tmp);
+		  q = 0;
+		}
+	      tmp.sign = sign;
+
+	      if ( (round == RC_RND) && (tmp.sigh & 0xc0000000) )
+		{
+		  /* We may need to subtract st(1) once more,
+		     to get a result <= 1/2 of st(1). */
+		  unsigned long long x;
+		  expdif = st1_ptr->exp - tmp.exp;
+		  if ( expdif <= 1 )
+		    {
+		      if ( expdif == 0 )
+			x = significand(st1_ptr) - significand(&tmp);
+		      else /* expdif is 1 */
+			x = (significand(st1_ptr) << 1) - significand(&tmp);
+		      if ( (x < significand(&tmp)) ||
+			  /* or equi-distant (from 0 & st(1)) and q is odd */
+			  ((x == significand(&tmp)) && (q & 1) ) )
+			{
+			  tmp.sign ^= (SIGN_POS^SIGN_NEG);
+			  significand(&tmp) = x;
+			  q++;
+			}
+		    }
+		}
+
+	      if (q & 4) cc |= SW_C0;
+	      if (q & 2) cc |= SW_C3;
+	      if (q & 1) cc |= SW_C1;
+	    }
+	  else
+	    {
+	      control_word = old_cw;
+	      setcc(0);
+	      return;
+	    }
+	}
+      else
+	{
+	  /* There is a large exponent difference ( >= 64 ) */
+	  /* To make much sense, the code in this section should
+	     be done at high precision. */
+	  int exp_1;
+
+	  /* prevent overflow here */
+	  /* N is 'a number between 32 and 63' (p26-113) */
+	  reg_move(st0_ptr, &tmp);
+	  tmp.exp = EXP_BIAS + 56;
+	  exp_1 = st1_ptr->exp;      st1_ptr->exp = EXP_BIAS;
+	  expdif -= 56;
+
+	  reg_div(&tmp, st1_ptr, &tmp, PR_64_BITS | RC_CHOP | 0x3f);
+	  st1_ptr->exp = exp_1;
+
+	  round_to_int(&tmp);  /* Fortunately, this can't overflow to 2^64 */
+
+	  rem_kernel(significand(st0_ptr),
+		     &significand(&tmp),
+		     significand(st1_ptr),
+		     significand(&tmp),
+		     tmp.exp - EXP_BIAS
+		     ); 
+	  tmp.exp = exp_1 + expdif;
+	  tmp.sign = sign;
+
+	  /* It is possible for the operation to be complete here.
+	     What does the IEEE standard say? The Intel 80486 manual
+	     implies that the operation will never be completed at this
+	     point, and the behaviour of a real 80486 confirms this.
+	   */
+	  if ( !(tmp.sigh | tmp.sigl) )
+	    {
+	      /* The result is zero */
+	      control_word = old_cw;
+	      partial_status = saved_status;
+	      reg_move(&CONST_Z, st0_ptr);
+	      st0_ptr->sign = sign;
+#ifdef PECULIAR_486
+	      setcc(SW_C2);
+#else
+	      setcc(0);
+#endif PECULIAR_486
+	      return;
+	    }
+	  cc = SW_C2;
+	}
+
+      control_word = old_cw;
+      partial_status = saved_status;
+      normalize_nuo(&tmp);
+      reg_move(&tmp, st0_ptr);
+      setcc(cc);
+
+      /* The only condition to be looked for is underflow,
+	 and it can occur here only if underflow is unmasked. */
+      if ( (st0_ptr->exp <= EXP_UNDER) && (st0_ptr->tag != TW_Zero)
+	  && !(control_word & CW_Underflow) )
+	arith_underflow(st0_ptr);
+
+      return;
+    }
+  else if ( (st0_tag == TW_Empty) | (st1_tag == TW_Empty) )
+    {
+      stack_underflow();
+      return;
+    }
+  else if ( st0_tag == TW_Zero )
+    {
+      if ( st1_tag == TW_Valid )
+	{
+#ifdef DENORM_OPERAND
+	  if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	    return;
+#endif DENORM_OPERAND
+
+	  setcc(0); return;
+	}
+      else if ( st1_tag == TW_Zero )
+	{ arith_invalid(st0_ptr); return; } /* fprem(?,0) always invalid */
+      else if ( st1_tag == TW_Infinity )
+	{ setcc(0); return; }
+    }
+  else if ( st0_tag == TW_Valid )
+    {
+      if ( st1_tag == TW_Zero )
+	{
+	  arith_invalid(st0_ptr); /* fprem(Valid,Zero) is invalid */
+	  return;
+	}
+      else if ( st1_tag != TW_NaN )
+	{
+#ifdef DENORM_OPERAND
+	  if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	    return;
+#endif DENORM_OPERAND
+
+	  if ( st1_tag == TW_Infinity )
+	    {
+	      /* fprem(Valid,Infinity) is o.k. */
+	      setcc(0); return;
+	    }
+	}
+    }
+  else if ( st0_tag == TW_Infinity )
+    {
+      if ( st1_tag != TW_NaN )
+	{
+	  arith_invalid(st0_ptr); /* fprem(Infinity,?) is invalid */
+	  return;
+	}
+    }
+
+  /* One of the registers must contain a NaN is we got here. */
+
+#ifdef PARANOID
+  if ( (st0_tag != TW_NaN) && (st1_tag != TW_NaN) )
+      EXCEPTION(EX_INTERNAL | 0x118);
+#endif PARANOID
+
+  real_2op_NaN(st1_ptr, st0_ptr, st0_ptr);
+
+}
+
+
+/* ST(1) <- ST(1) * log ST;  pop ST */
+static void fyl2x(FPU_REG *st0_ptr)
+{
+  char st0_tag = st0_ptr->tag;
+  FPU_REG *st1_ptr = &st(1), exponent;
+  char st1_tag = st1_ptr->tag;
+  int e;
+
+  clear_C1();
+  if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) )
+    {
+      if ( st0_ptr->sign == SIGN_POS )
+	{
+#ifdef DENORM_OPERAND
+	  if ( ((st0_ptr->exp <= EXP_UNDER) ||
+		(st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) )
+	    return;
+#endif DENORM_OPERAND
+
+	  if ( (st0_ptr->sigh == 0x80000000) && (st0_ptr->sigl == 0) )
+	    {
+	      /* Special case. The result can be precise. */
+	      e = st0_ptr->exp - EXP_BIAS;
+	      if ( e > 0 )
+		{
+		  exponent.sigh = e;
+		  exponent.sign = SIGN_POS;
+		}
+	      else
+		{
+		  exponent.sigh = -e;
+		  exponent.sign = SIGN_NEG;
+		}
+	      exponent.sigl = 0;
+	      exponent.exp = EXP_BIAS + 31;
+	      exponent.tag = TW_Valid;
+	      normalize_nuo(&exponent);
+	      reg_mul(&exponent, st1_ptr, st1_ptr, FULL_PRECISION);
+	    }
+	  else
+	    {
+	      /* The usual case */
+	      poly_l2(st0_ptr, st1_ptr, st1_ptr);
+	      if ( st1_ptr->exp <= EXP_UNDER )
+		{
+		  /* A denormal result has been produced.
+		     Precision must have been lost, this is always
+		     an underflow. */
+		  arith_underflow(st1_ptr);
+		}
+	      else
+		set_precision_flag_up();  /* 80486 appears to always do this */
+	    }
+	  pop();
+	  return;
+	}
+      else
+	{
+	  /* negative */
+	  if ( !arith_invalid(st1_ptr) )
+	    pop();
+	  return;
+	}
+    }
+  else if ( (st0_tag == TW_Empty) || (st1_tag == TW_Empty) )
+    {
+      stack_underflow_pop(1);
+      return;
+    }
+  else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
+    {
+      if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) )
+	pop();
+      return;
+    }
+  else if ( (st0_tag <= TW_Zero) && (st1_tag <= TW_Zero) )
+    {
+      /* one of the args is zero, the other valid, or both zero */
+      if ( st0_tag == TW_Zero )
+	{
+	  if ( st1_tag == TW_Zero )
+	    {
+	      /* Both args zero is invalid */
+	      if ( !arith_invalid(st1_ptr) )
+		pop();
+	    }
+#ifdef PECULIAR_486
+	  /* This case is not specifically covered in the manual,
+	     but divide-by-zero would seem to be the best response.
+	     However, a real 80486 does it this way... */
+	  else if ( st0_ptr->tag == TW_Infinity )
+	    {
+	      reg_move(&CONST_INF, st1_ptr);
+	      pop();
+	    }
+#endif PECULIAR_486
+	  else
+	    {
+	      if ( !divide_by_zero(st1_ptr->sign^SIGN_NEG^SIGN_POS, st1_ptr) )
+		pop();
+	    }
+	  return;
+	}
+      else
+	{
+	  /* st(1) contains zero, st(0) valid <> 0 */
+	  /* Zero is the valid answer */
+	  char sign = st1_ptr->sign;
+
+	  if ( st0_ptr->sign == SIGN_NEG )
+	    {
+	      /* log(negative) */
+	      if ( !arith_invalid(st1_ptr) )
+		pop();
+	      return;
+	    }
+
+#ifdef DENORM_OPERAND
+	  if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	    return;
+#endif DENORM_OPERAND
+
+	  if ( st0_ptr->exp < EXP_BIAS ) sign ^= SIGN_NEG^SIGN_POS;
+	  pop(); st0_ptr = &st(0);
+	  reg_move(&CONST_Z, st0_ptr);
+	  st0_ptr->sign = sign;
+	  return;
+	}
+    }
+  /* One or both arg must be an infinity */
+  else if ( st0_tag == TW_Infinity )
+    {
+      if ( (st0_ptr->sign == SIGN_NEG) || (st1_tag == TW_Zero) )
+	{
+	  /* log(-infinity) or 0*log(infinity) */
+	  if ( !arith_invalid(st1_ptr) )
+	    pop();
+	  return;
+	}
+      else
+	{
+	  char sign = st1_ptr->sign;
+
+#ifdef DENORM_OPERAND
+	  if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	    return;
+#endif DENORM_OPERAND
+
+	  pop(); st0_ptr = &st(0);
+	  reg_move(&CONST_INF, st0_ptr);
+	  st0_ptr->sign = sign;
+	  return;
+	}
+    }
+  /* st(1) must be infinity here */
+  else if ( (st0_tag == TW_Valid) && (st0_ptr->sign == SIGN_POS) )
+    {
+      if ( st0_ptr->exp >= EXP_BIAS )
+	{
+	  if ( (st0_ptr->exp == EXP_BIAS) &&
+	      (st0_ptr->sigh == 0x80000000) &&
+	      (st0_ptr->sigl == 0) )
+	    {
+	      /* st(0) holds 1.0 */
+	      /* infinity*log(1) */
+	      if ( !arith_invalid(st1_ptr) )
+		pop();
+	      return;
+	    }
+	  /* st(0) is positive and > 1.0 */
+	  pop();
+	}
+      else
+	{
+	  /* st(0) is positive and < 1.0 */
+
+#ifdef DENORM_OPERAND
+	  if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	    return;
+#endif DENORM_OPERAND
+
+	  st1_ptr->sign ^= SIGN_NEG;
+	  pop();
+	}
+      return;
+    }
+  else
+    {
+      /* st(0) must be zero or negative */
+      if ( st0_ptr->tag == TW_Zero )
+	{
+	  /* This should be invalid, but a real 80486 is happy with it. */
+#ifndef PECULIAR_486
+	  if ( !divide_by_zero(st1_ptr->sign, st1_ptr) )
+#endif PECULIAR_486
+	    {
+	      st1_ptr->sign ^= SIGN_NEG^SIGN_POS;
+	      pop();
+	    }
+	}
+      else
+	{
+	  /* log(negative) */
+	  if ( !arith_invalid(st1_ptr) )
+	    pop();
+	}
+      return;
+    }
+}
+
+
+static void fpatan(FPU_REG *st0_ptr)
+{
+  char st0_tag = st0_ptr->tag;
+  FPU_REG *st1_ptr = &st(1);
+  char st1_tag = st1_ptr->tag;
+
+  clear_C1();
+  if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) )
+    {
+#ifdef DENORM_OPERAND
+      if ( ((st0_ptr->exp <= EXP_UNDER) ||
+	    (st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) )
+	return;
+#endif DENORM_OPERAND
+
+      poly_atan(st0_ptr, st1_ptr, st1_ptr);
+
+      if ( st1_ptr->exp <= EXP_UNDER )
+	{
+	  /* A denormal result has been produced.
+	     Precision must have been lost.
+	     This is by definition an underflow. */
+	  arith_underflow(st1_ptr);
+	  pop();
+	  return;
+	}
+    }
+  else if ( (st0_tag == TW_Empty) || (st1_tag == TW_Empty) )
+    {
+      stack_underflow_pop(1);
+      return;
+    }
+  else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
+    {
+      if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) )
+	  pop();
+      return;
+    }
+  else if ( (st0_tag == TW_Infinity) || (st1_tag == TW_Infinity) )
+    {
+      char sign = st1_ptr->sign;
+      if ( st0_tag == TW_Infinity )
+	{
+	  if ( st1_tag == TW_Infinity )
+	    {
+	      if ( st0_ptr->sign == SIGN_POS )
+		{ reg_move(&CONST_PI4, st1_ptr); }
+	      else
+		reg_add(&CONST_PI4, &CONST_PI2, st1_ptr, FULL_PRECISION);
+	    }
+	  else
+	    {
+#ifdef DENORM_OPERAND
+	      if ( st1_tag != TW_Zero )
+		{
+		  if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+		    return;
+		}
+#endif DENORM_OPERAND
+
+	      if ( st0_ptr->sign == SIGN_POS )
+		{
+		  reg_move(&CONST_Z, st1_ptr);
+		  st1_ptr->sign = sign;   /* An 80486 preserves the sign */
+		  pop();
+		  return;
+		}
+	      else
+		reg_move(&CONST_PI, st1_ptr);
+	    }
+	}
+      else
+	{
+	  /* st(1) is infinity, st(0) not infinity */
+#ifdef DENORM_OPERAND
+	  if ( st0_tag != TW_Zero )
+	    {
+	      if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+		return;
+	    }
+#endif DENORM_OPERAND
+
+	  reg_move(&CONST_PI2, st1_ptr);
+	}
+      st1_ptr->sign = sign;
+    }
+  else if ( st1_tag == TW_Zero )
+    {
+      /* st(0) must be valid or zero */
+      char sign = st1_ptr->sign;
+
+#ifdef DENORM_OPERAND
+      if ( st0_tag != TW_Zero )
+	{
+	  if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	    return;
+	}
+#endif DENORM_OPERAND
+
+      if ( st0_ptr->sign == SIGN_POS )
+	{ /* An 80486 preserves the sign */ pop(); return; }
+      else
+	reg_move(&CONST_PI, st1_ptr);
+      st1_ptr->sign = sign;
+    }
+  else if ( st0_tag == TW_Zero )
+    {
+      /* st(1) must be TW_Valid here */
+      char sign = st1_ptr->sign;
+
+#ifdef DENORM_OPERAND
+      if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	return;
+#endif DENORM_OPERAND
+
+      reg_move(&CONST_PI2, st1_ptr);
+      st1_ptr->sign = sign;
+    }
+#ifdef PARANOID
+  else
+    EXCEPTION(EX_INTERNAL | 0x125);
+#endif PARANOID
+
+  pop();
+  set_precision_flag_up();  /* We do not really know if up or down */
+}
+
+
+static void fprem(FPU_REG *st0_ptr)
+{
+  do_fprem(st0_ptr, RC_CHOP);
+}
+
+
+static void fprem1(FPU_REG *st0_ptr)
+{
+  do_fprem(st0_ptr, RC_RND);
+}
+
+
+static void fyl2xp1(FPU_REG *st0_ptr)
+{
+  char st0_tag = st0_ptr->tag, sign;
+  FPU_REG *st1_ptr = &st(1);
+  char st1_tag = st1_ptr->tag;
+
+  clear_C1();
+  if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) )
+    {
+#ifdef DENORM_OPERAND
+      if ( ((st0_ptr->exp <= EXP_UNDER) ||
+	    (st1_ptr->exp <= EXP_UNDER)) && denormal_operand() )
+	return;
+#endif DENORM_OPERAND
+
+      if ( poly_l2p1(st0_ptr, st1_ptr, st1_ptr) )
+	{
+#ifdef PECULIAR_486   /* Stupid 80486 doesn't worry about log(negative). */
+	  st1_ptr->sign ^= SIGN_POS^SIGN_NEG;
+#else
+	  if ( arith_invalid(st1_ptr) )  /* poly_l2p1() returned invalid */
+	    return;
+#endif PECULIAR_486
+	}
+      if ( st1_ptr->exp <= EXP_UNDER )
+	{
+	  /* A denormal result has been produced.
+	     Precision must have been lost, this is always
+	     an underflow. */
+	  sign = st1_ptr->sign;
+	  arith_underflow(st1_ptr);
+	  st1_ptr->sign = sign;
+	}
+      else
+	set_precision_flag_up();   /* 80486 appears to always do this */
+      pop();
+      return;
+    }
+  else if ( (st0_tag == TW_Empty) | (st1_tag == TW_Empty) )
+    {
+      stack_underflow_pop(1);
+      return;
+    }
+  else if ( st0_tag == TW_Zero )
+    {
+      if ( st1_tag <= TW_Zero )
+	{
+#ifdef DENORM_OPERAND
+	  if ( (st1_tag == TW_Valid) && (st1_ptr->exp <= EXP_UNDER) &&
+	      (denormal_operand()) )
+	    return;
+#endif DENORM_OPERAND
+	  
+	  st0_ptr->sign ^= st1_ptr->sign;
+	  reg_move(st0_ptr, st1_ptr);
+	}
+      else if ( st1_tag == TW_Infinity )
+	{
+	  /* Infinity*log(1) */
+	  if ( !arith_invalid(st1_ptr) )
+	    pop();
+	  return;
+	}
+      else if ( st1_tag == TW_NaN )
+	{
+	  if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) )
+	    pop();
+	  return;
+	}
+#ifdef PARANOID
+      else
+	{
+	  EXCEPTION(EX_INTERNAL | 0x116);
+	  return;
+	}
+#endif PARANOID
+      pop(); return;
+    }
+  else if ( st0_tag == TW_Valid )
+    {
+      if ( st1_tag == TW_Zero )
+	{
+	  if ( st0_ptr->sign == SIGN_NEG )
+	    {
+	      if ( st0_ptr->exp >= EXP_BIAS )
+		{
+		  /* st(0) holds <= -1.0 */
+#ifdef PECULIAR_486   /* Stupid 80486 doesn't worry about log(negative). */
+		  st1_ptr->sign ^= SIGN_POS^SIGN_NEG;
+#else
+		  if ( arith_invalid(st1_ptr) ) return;
+#endif PECULIAR_486
+		  pop(); return;
+		}
+#ifdef DENORM_OPERAND
+	      if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+		return;
+#endif DENORM_OPERAND
+	      st1_ptr->sign ^= SIGN_POS^SIGN_NEG;
+	      pop(); return;
+	    }
+#ifdef DENORM_OPERAND
+	  if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	    return;
+#endif DENORM_OPERAND
+	  pop(); return;
+	}
+      if ( st1_tag == TW_Infinity )
+	{
+	  if ( st0_ptr->sign == SIGN_NEG )
+	    {
+	      if ( (st0_ptr->exp >= EXP_BIAS) &&
+		  !((st0_ptr->sigh == 0x80000000) &&
+		    (st0_ptr->sigl == 0)) )
+		{
+		  /* st(0) holds < -1.0 */
+#ifdef PECULIAR_486   /* Stupid 80486 doesn't worry about log(negative). */
+		  st1_ptr->sign ^= SIGN_POS^SIGN_NEG;
+#else
+		  if ( arith_invalid(st1_ptr) ) return;
+#endif PECULIAR_486
+		  pop(); return;
+		}
+#ifdef DENORM_OPERAND
+	      if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+		return;
+#endif DENORM_OPERAND
+	      st1_ptr->sign ^= SIGN_POS^SIGN_NEG;
+	      pop(); return;
+	    }
+#ifdef DENORM_OPERAND
+	  if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	    return;
+#endif DENORM_OPERAND
+	  pop(); return;
+	}
+      if ( st1_tag == TW_NaN )
+	{
+	  if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) )
+	    pop();
+	  return;
+	}
+    }
+  else if ( st0_tag == TW_NaN )
+    {
+      if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) )
+	pop();
+      return;
+    }
+  else if ( st0_tag == TW_Infinity )
+    {
+      if ( st1_tag == TW_NaN )
+	{
+	  if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) )
+	    pop();
+	  return;
+	}
+      else if ( st0_ptr->sign == SIGN_NEG )
+	{
+	  int exponent = st1_ptr->exp;
+#ifndef PECULIAR_486
+	  /* This should have higher priority than denormals, but... */
+	  if ( arith_invalid(st1_ptr) )  /* log(-infinity) */
+	    return;
+#endif PECULIAR_486
+#ifdef DENORM_OPERAND
+	  if ( st1_tag != TW_Zero )
+	    {
+	      if ( (exponent <= EXP_UNDER) && (denormal_operand()) )
+		return;
+	    }
+#endif DENORM_OPERAND
+#ifdef PECULIAR_486
+	  /* Denormal operands actually get higher priority */
+	  if ( arith_invalid(st1_ptr) )  /* log(-infinity) */
+	    return;
+#endif PECULIAR_486
+	  pop();
+	  return;
+	}
+      else if ( st1_tag == TW_Zero )
+	{
+	  /* log(infinity) */
+	  if ( !arith_invalid(st1_ptr) )
+	    pop();
+	  return;
+	}
+	
+      /* st(1) must be valid here. */
+
+#ifdef DENORM_OPERAND
+      if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	return;
+#endif DENORM_OPERAND
+
+      /* The Manual says that log(Infinity) is invalid, but a real
+	 80486 sensibly says that it is o.k. */
+      { char sign = st1_ptr->sign;
+	reg_move(&CONST_INF, st1_ptr);
+	st1_ptr->sign = sign;
+      }
+      pop();
+      return;
+    }
+#ifdef PARANOID
+  else
+    {
+      EXCEPTION(EX_INTERNAL | 0x117);
+    }
+#endif PARANOID
+}
+
+
+static void fscale(FPU_REG *st0_ptr)
+{
+  char st0_tag = st0_ptr->tag;
+  FPU_REG *st1_ptr = &st(1);
+  char st1_tag = st1_ptr->tag;
+  int old_cw = control_word;
+  char sign = st0_ptr->sign;
+
+  clear_C1();
+  if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) )
+    {
+      long scale;
+      FPU_REG tmp;
+
+#ifdef DENORM_OPERAND
+      if ( ((st0_ptr->exp <= EXP_UNDER) ||
+	    (st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) )
+	return;
+#endif DENORM_OPERAND
+
+      if ( st1_ptr->exp > EXP_BIAS + 30 )
+	{
+	  /* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */
+	  char sign;
+
+	  if ( st1_ptr->sign == SIGN_POS )
+	    {
+	      EXCEPTION(EX_Overflow);
+	      sign = st0_ptr->sign;
+	      reg_move(&CONST_INF, st0_ptr);
+	      st0_ptr->sign = sign;
+	    }
+	  else
+	    {
+	      EXCEPTION(EX_Underflow);
+	      sign = st0_ptr->sign;
+	      reg_move(&CONST_Z, st0_ptr);
+	      st0_ptr->sign = sign;
+	    }
+	  return;
+	}
+
+      control_word &= ~CW_RC;
+      control_word |= RC_CHOP;
+      reg_move(st1_ptr, &tmp);
+      round_to_int(&tmp);               /* This can never overflow here */
+      control_word = old_cw;
+      scale = st1_ptr->sign ? -tmp.sigl : tmp.sigl;
+      scale += st0_ptr->exp;
+      st0_ptr->exp = scale;
+
+      /* Use round_reg() to properly detect under/overflow etc */
+      round_reg(st0_ptr, 0, control_word);
+
+      return;
+    }
+  else if ( st0_tag == TW_Valid )
+    {
+      if ( st1_tag == TW_Zero )
+	{
+
+#ifdef DENORM_OPERAND
+	  if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	    return;
+#endif DENORM_OPERAND
+
+	  return;
+	}
+      if ( st1_tag == TW_Infinity )
+	{
+#ifdef DENORM_OPERAND
+	  if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	    return;
+#endif DENORM_OPERAND
+
+	  if ( st1_ptr->sign == SIGN_POS )
+	    { reg_move(&CONST_INF, st0_ptr); }
+	  else
+	      reg_move(&CONST_Z, st0_ptr);
+	  st0_ptr->sign = sign;
+	  return;
+	}
+      if ( st1_tag == TW_NaN )
+	{ real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; }
+    }
+  else if ( st0_tag == TW_Zero )
+    {
+      if ( st1_tag == TW_Valid )
+	{
+
+#ifdef DENORM_OPERAND
+	  if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	    return;
+#endif DENORM_OPERAND
+
+	  return;
+	}
+      else if ( st1_tag == TW_Zero ) { return; }
+      else if ( st1_tag == TW_Infinity )
+	{
+	  if ( st1_ptr->sign == SIGN_NEG )
+	    return;
+	  else
+	    {
+	      arith_invalid(st0_ptr); /* Zero scaled by +Infinity */
+	      return;
+	    }
+	}
+      else if ( st1_tag == TW_NaN )
+	{ real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; }
+    }
+  else if ( st0_tag == TW_Infinity )
+    {
+      if ( st1_tag == TW_Valid )
+	{
+
+#ifdef DENORM_OPERAND
+	  if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) )
+	    return;
+#endif DENORM_OPERAND
+
+	  return;
+	}
+      if ( ((st1_tag == TW_Infinity) && (st1_ptr->sign == SIGN_POS))
+	  || (st1_tag == TW_Zero) )
+	return;
+      else if ( st1_tag == TW_Infinity )
+	{
+	  arith_invalid(st0_ptr); /* Infinity scaled by -Infinity */
+	  return;
+	}
+      else if ( st1_tag == TW_NaN )
+	{ real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; }
+    }
+  else if ( st0_tag == TW_NaN )
+    {
+      if ( st1_tag != TW_Empty )
+	{ real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; }
+    }
+
+#ifdef PARANOID
+  if ( !((st0_tag == TW_Empty) || (st1_tag == TW_Empty)) )
+    {
+      EXCEPTION(EX_INTERNAL | 0x115);
+      return;
+    }
+#endif
+
+  /* At least one of st(0), st(1) must be empty */
+  stack_underflow();
+
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static FUNC_ST0 const trig_table_a[] = {
+  f2xm1, fyl2x, fptan, fpatan, fxtract, fprem1, fdecstp, fincstp
+};
+
+void trig_a(void)
+{
+  (trig_table_a[FPU_rm])(&st(0));
+}
+
+
+static FUNC_ST0 const trig_table_b[] =
+  {
+    fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, fsin, fcos
+  };
+
+void trig_b(void)
+{
+  (trig_table_b[FPU_rm])(&st(0));
+}
diff --git a/arch/i386/math-emu/get_address.c b/arch/i386/math-emu/get_address.c
new file mode 100644
index 000000000..6f3270ae3
--- /dev/null
+++ b/arch/i386/math-emu/get_address.c
@@ -0,0 +1,423 @@
+/*---------------------------------------------------------------------------+
+ |  get_address.c                                                            |
+ |                                                                           |
+ | Get the effective address from an FPU instruction.                        |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | Note:                                                                     |
+ |    The file contains code which accesses user memory.                     |
+ |    Emulator static data may change when user memory is accessed, due to   |
+ |    other processes using the emulator while swapping is in progress.      |
+ +---------------------------------------------------------------------------*/
+
+
+#include <linux/stddef.h>
+#include <linux/head.h>
+
+#include <asm/segment.h>
+
+#include "fpu_system.h"
+#include "exception.h"
+#include "fpu_emu.h"
+
+
+#define FPU_WRITE_BIT 0x10
+
+static int reg_offset[] = {
+	offsetof(struct info,___eax),
+	offsetof(struct info,___ecx),
+	offsetof(struct info,___edx),
+	offsetof(struct info,___ebx),
+	offsetof(struct info,___esp),
+	offsetof(struct info,___ebp),
+	offsetof(struct info,___esi),
+	offsetof(struct info,___edi)
+};
+
+#define REG_(x) (*(long *)(reg_offset[(x)]+(char *) FPU_info))
+
+static int reg_offset_vm86[] = {
+	offsetof(struct info,___cs),
+	offsetof(struct info,___vm86_ds),
+	offsetof(struct info,___vm86_es),
+	offsetof(struct info,___vm86_fs),
+	offsetof(struct info,___vm86_gs),
+	offsetof(struct info,___ss),
+	offsetof(struct info,___vm86_ds)
+      };
+
+#define VM86_REG_(x) (*(unsigned short *) \
+		      (reg_offset_vm86[((unsigned)x)]+(char *) FPU_info))
+
+static int reg_offset_pm[] = {
+	offsetof(struct info,___cs),
+	offsetof(struct info,___ds),
+	offsetof(struct info,___es),
+	offsetof(struct info,___fs),
+	offsetof(struct info,___gs),
+	offsetof(struct info,___ss),
+	offsetof(struct info,___ds)
+      };
+
+#define PM_REG_(x) (*(unsigned short *) \
+		      (reg_offset_pm[((unsigned)x)]+(char *) FPU_info))
+
+
+/* Decode the SIB byte. This function assumes mod != 0 */
+static int sib(int mod, unsigned long *fpu_eip)
+{
+  unsigned char ss,index,base;
+  long offset;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_code_verify_area(1);
+  base = get_fs_byte((char *) (*fpu_eip));   /* The SIB byte */
+  RE_ENTRANT_CHECK_ON;
+  (*fpu_eip)++;
+  ss = base >> 6;
+  index = (base >> 3) & 7;
+  base &= 7;
+
+  if ((mod == 0) && (base == 5))
+    offset = 0;              /* No base register */
+  else
+    offset = REG_(base);
+
+  if (index == 4)
+    {
+      /* No index register */
+      /* A non-zero ss is illegal */
+      if ( ss )
+	EXCEPTION(EX_Invalid);
+    }
+  else
+    {
+      offset += (REG_(index)) << ss;
+    }
+
+  if (mod == 1)
+    {
+      /* 8 bit signed displacement */
+      RE_ENTRANT_CHECK_OFF;
+      FPU_code_verify_area(1);
+      offset += (signed char) get_fs_byte((char *) (*fpu_eip));
+      RE_ENTRANT_CHECK_ON;
+      (*fpu_eip)++;
+    }
+  else if (mod == 2 || base == 5) /* The second condition also has mod==0 */
+    {
+      /* 32 bit displacement */
+      RE_ENTRANT_CHECK_OFF;
+      FPU_code_verify_area(4);
+      offset += (signed) get_fs_long((unsigned long *) (*fpu_eip));
+      RE_ENTRANT_CHECK_ON;
+      (*fpu_eip) += 4;
+    }
+
+  return offset;
+}
+
+
+static unsigned long vm86_segment(unsigned char segment,
+				  unsigned short *selector)
+{ 
+  segment--;
+#ifdef PARANOID
+  if ( segment > PREFIX_SS_ )
+    {
+      EXCEPTION(EX_INTERNAL|0x130);
+      math_abort(FPU_info,SIGSEGV);
+    }
+#endif PARANOID
+  *selector = VM86_REG_(segment);
+  return (unsigned long)VM86_REG_(segment) << 4;
+}
+
+
+/* This should work for 16 and 32 bit protected mode. */
+static long pm_address(unsigned char FPU_modrm, unsigned char segment,
+		       unsigned short *selector, long offset)
+{ 
+  struct desc_struct descriptor;
+  unsigned long base_address, limit, address, seg_top;
+
+  segment--;
+#ifdef PARANOID
+  if ( segment > PREFIX_SS_ )
+    {
+      EXCEPTION(EX_INTERNAL|0x132);
+      math_abort(FPU_info,SIGSEGV);
+    }
+#endif PARANOID
+
+  *selector = PM_REG_(segment);
+
+  descriptor = LDT_DESCRIPTOR(PM_REG_(segment));
+  base_address = SEG_BASE_ADDR(descriptor);
+  address = base_address + offset;
+  limit = base_address
+	+ (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1;
+  if ( limit < base_address ) limit = 0xffffffff;
+
+  if ( SEG_EXPAND_DOWN(descriptor) )
+    {
+      if ( SEG_G_BIT(descriptor) )
+	seg_top = 0xffffffff;
+      else
+	{
+	  seg_top = base_address + (1 << 20);
+	  if ( seg_top < base_address ) seg_top = 0xffffffff;
+	}
+      access_limit =
+	(address <= limit) || (address >= seg_top) ? 0 :
+	  ((seg_top-address) >= 255 ? 255 : seg_top-address);
+    }
+  else
+    {
+      access_limit =
+	(address > limit) || (address < base_address) ? 0 :
+	  ((limit-address) >= 254 ? 255 : limit-address+1);
+    }
+  if ( SEG_EXECUTE_ONLY(descriptor) ||
+      (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) )
+    {
+      access_limit = 0;
+    }
+  return address;
+}
+
+
+/*
+       MOD R/M byte:  MOD == 3 has a special use for the FPU
+                      SIB byte used iff R/M = 100b
+
+       7   6   5   4   3   2   1   0
+       .....   .........   .........
+        MOD    OPCODE(2)     R/M
+
+
+       SIB byte
+
+       7   6   5   4   3   2   1   0
+       .....   .........   .........
+        SS      INDEX        BASE
+
+*/
+
+void *get_address(unsigned char FPU_modrm, unsigned long *fpu_eip,
+		  struct address *addr,
+/*		  unsigned short *selector, unsigned long *offset, */
+		  fpu_addr_modes addr_modes)
+{
+  unsigned char mod;
+  unsigned rm = FPU_modrm & 7;
+  long *cpu_reg_ptr;
+  int address = 0;     /* Initialized just to stop compiler warnings. */
+
+  /* Memory accessed via the cs selector is write protected
+     in `non-segmented' 32 bit protected mode. */
+  if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
+      && (addr_modes.override.segment == PREFIX_CS_) )
+    {
+      math_abort(FPU_info,SIGSEGV);
+    }
+
+  addr->selector = FPU_DS;   /* Default, for 32 bit non-segmented mode. */
+
+  mod = (FPU_modrm >> 6) & 3;
+
+  if (rm == 4 && mod != 3)
+    {
+      address = sib(mod, fpu_eip);
+    }
+  else
+    {
+      cpu_reg_ptr = & REG_(rm);
+      switch (mod)
+	{
+	case 0:
+	  if (rm == 5)
+	    {
+	      /* Special case: disp32 */
+	      RE_ENTRANT_CHECK_OFF;
+	      FPU_code_verify_area(4);
+	      address = get_fs_long((unsigned long *) (*fpu_eip));
+	      (*fpu_eip) += 4;
+	      RE_ENTRANT_CHECK_ON;
+	      addr->offset = address;
+	      return (void *) address;
+	    }
+	  else
+	    {
+	      address = *cpu_reg_ptr;  /* Just return the contents
+					  of the cpu register */
+	      addr->offset = address;
+	      return (void *) address;
+	    }
+	case 1:
+	  /* 8 bit signed displacement */
+	  RE_ENTRANT_CHECK_OFF;
+	  FPU_code_verify_area(1);
+	  address = (signed char) get_fs_byte((char *) (*fpu_eip));
+	  RE_ENTRANT_CHECK_ON;
+	  (*fpu_eip)++;
+	  break;
+	case 2:
+	  /* 32 bit displacement */
+	  RE_ENTRANT_CHECK_OFF;
+	  FPU_code_verify_area(4);
+	  address = (signed) get_fs_long((unsigned long *) (*fpu_eip));
+	  (*fpu_eip) += 4;
+	  RE_ENTRANT_CHECK_ON;
+	  break;
+	case 3:
+	  /* Not legal for the FPU */
+	  EXCEPTION(EX_Invalid);
+	}
+      address += *cpu_reg_ptr;
+    }
+
+  addr->offset = address;
+
+  switch ( addr_modes.default_mode )
+    {
+    case 0:
+      break;
+    case VM86:
+      address += vm86_segment(addr_modes.override.segment,
+			      (unsigned short *)&(addr->selector));
+      break;
+    case PM16:
+    case SEG32:
+      address = pm_address(FPU_modrm, addr_modes.override.segment,
+			   (unsigned short *)&(addr->selector), address);
+      break;
+    default:
+      EXCEPTION(EX_INTERNAL|0x133);
+    }
+
+  return (void *)address;
+}
+
+
+void *get_address_16(unsigned char FPU_modrm, unsigned long *fpu_eip,
+		     struct address *addr,
+/*		     unsigned short *selector, unsigned long *offset, */
+		     fpu_addr_modes addr_modes)
+{
+  unsigned char mod;
+  unsigned rm = FPU_modrm & 7;
+  int address = 0;     /* Default used for mod == 0 */
+
+  /* Memory accessed via the cs selector is write protected
+     in `non-segmented' 32 bit protected mode. */
+  if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
+      && (addr_modes.override.segment == PREFIX_CS_) )
+    {
+      math_abort(FPU_info,SIGSEGV);
+    }
+
+  addr->selector = FPU_DS;   /* Default, for 32 bit non-segmented mode. */
+
+  mod = (FPU_modrm >> 6) & 3;
+
+  switch (mod)
+    {
+    case 0:
+      if (rm == 6)
+	{
+	  /* Special case: disp16 */
+	  RE_ENTRANT_CHECK_OFF;
+	  FPU_code_verify_area(2);
+	  address = (unsigned short)get_fs_word((unsigned short *) (*fpu_eip));
+	  (*fpu_eip) += 2;
+	  RE_ENTRANT_CHECK_ON;
+	  goto add_segment;
+	}
+      break;
+    case 1:
+      /* 8 bit signed displacement */
+      RE_ENTRANT_CHECK_OFF;
+      FPU_code_verify_area(1);
+      address = (signed char) get_fs_byte((signed char *) (*fpu_eip));
+      RE_ENTRANT_CHECK_ON;
+      (*fpu_eip)++;
+      break;
+    case 2:
+      /* 16 bit displacement */
+      RE_ENTRANT_CHECK_OFF;
+      FPU_code_verify_area(2);
+      address = (unsigned) get_fs_word((unsigned short *) (*fpu_eip));
+      (*fpu_eip) += 2;
+      RE_ENTRANT_CHECK_ON;
+      break;
+    case 3:
+      /* Not legal for the FPU */
+      EXCEPTION(EX_Invalid);
+      break;
+    }
+  switch ( rm )
+    {
+    case 0:
+      address += FPU_info->___ebx + FPU_info->___esi;
+      break;
+    case 1:
+      address += FPU_info->___ebx + FPU_info->___edi;
+      break;
+    case 2:
+      address += FPU_info->___ebp + FPU_info->___esi;
+      if ( addr_modes.override.segment == PREFIX_DEFAULT )
+	addr_modes.override.segment = PREFIX_SS_;
+      break;
+    case 3:
+      address += FPU_info->___ebp + FPU_info->___edi;
+      if ( addr_modes.override.segment == PREFIX_DEFAULT )
+	addr_modes.override.segment = PREFIX_SS_;
+      break;
+    case 4:
+      address += FPU_info->___esi;
+      break;
+    case 5:
+      address += FPU_info->___edi;
+      break;
+    case 6:
+      address += FPU_info->___ebp;
+      if ( addr_modes.override.segment == PREFIX_DEFAULT )
+	addr_modes.override.segment = PREFIX_SS_;
+      break;
+    case 7:
+      address += FPU_info->___ebx;
+      break;
+    }
+
+ add_segment:
+  address &= 0xffff;
+
+  addr->offset = address;
+
+  switch ( addr_modes.default_mode )
+    {
+    case 0:
+      break;
+    case VM86:
+      address += vm86_segment(addr_modes.override.segment,
+			      (unsigned short *)&(addr->selector));
+      break;
+    case PM16:
+    case SEG32:
+      address = pm_address(FPU_modrm, addr_modes.override.segment,
+			   (unsigned short *)&(addr->selector), address);
+      break;
+    default:
+      EXCEPTION(EX_INTERNAL|0x131);
+    }
+
+  return (void *)address ;
+}
diff --git a/arch/i386/math-emu/load_store.c b/arch/i386/math-emu/load_store.c
new file mode 100644
index 000000000..6f0e167d6
--- /dev/null
+++ b/arch/i386/math-emu/load_store.c
@@ -0,0 +1,260 @@
+/*---------------------------------------------------------------------------+
+ |  load_store.c                                                             |
+ |                                                                           |
+ | This file contains most of the code to interpret the FPU instructions     |
+ | which load and store from user memory.                                    |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | Note:                                                                     |
+ |    The file contains code which accesses user memory.                     |
+ |    Emulator static data may change when user memory is accessed, due to   |
+ |    other processes using the emulator while swapping is in progress.      |
+ +---------------------------------------------------------------------------*/
+
+#include <asm/segment.h>
+
+#include "fpu_system.h"
+#include "exception.h"
+#include "fpu_emu.h"
+#include "status_w.h"
+#include "control_w.h"
+
+
+#define _NONE_ 0   /* st0_ptr etc not needed */
+#define _REG0_ 1   /* Will be storing st(0) */
+#define _PUSH_ 3   /* Need to check for space to push onto stack */
+#define _null_ 4   /* Function illegal or not implemented */
+
+#define pop_0()	{ st0_ptr->tag = TW_Empty; top++; }
+
+
+static unsigned char const type_table[32] = {
+  _PUSH_, _PUSH_, _PUSH_, _PUSH_,
+  _null_, _null_, _null_, _null_,
+  _REG0_, _REG0_, _REG0_, _REG0_,
+  _REG0_, _REG0_, _REG0_, _REG0_,
+  _NONE_, _null_, _NONE_, _PUSH_,
+  _NONE_, _PUSH_, _null_, _PUSH_,
+  _NONE_, _null_, _NONE_, _REG0_,
+  _NONE_, _REG0_, _NONE_, _REG0_
+  };
+
+unsigned char const data_sizes_16[32] = {
+  4,  4,  8,  2,  0,  0,  0,  0,
+  4,  4,  8,  2,  4,  4,  8,  2,
+  14, 0, 94, 10,  2, 10,  0,  8,  
+  14, 0, 94, 10,  2, 10,  2,  8
+};
+
+unsigned char const data_sizes_32[32] = {
+  4,  4,  8,  2,  0,  0,  0,  0,
+  4,  4,  8,  2,  4,  4,  8,  2,
+  28, 0,108, 10,  2, 10,  0,  8,  
+  28, 0,108, 10,  2, 10,  2,  8
+};
+
+int load_store_instr(unsigned char type, fpu_addr_modes addr_modes,
+		     void *data_address)
+{
+  FPU_REG loaded_data;
+  FPU_REG *st0_ptr;
+
+  st0_ptr = NULL;    /* Initialized just to stop compiler warnings. */
+
+  if ( addr_modes.default_mode & PROTECTED )
+    {
+      if ( addr_modes.default_mode == SEG32 )
+	{
+	  if ( access_limit < data_sizes_32[type] )
+	    math_abort(FPU_info,SIGSEGV);
+	}
+      else if ( addr_modes.default_mode == PM16 )
+	{
+	  if ( access_limit < data_sizes_16[type] )
+	    math_abort(FPU_info,SIGSEGV);
+	}
+#ifdef PARANOID
+      else
+	EXCEPTION(EX_INTERNAL|0x140);
+#endif PARANOID
+    }
+
+  switch ( type_table[type] )
+    {
+    case _NONE_:
+      break;
+    case _REG0_:
+      st0_ptr = &st(0);       /* Some of these instructions pop after
+				 storing */
+      break;
+    case _PUSH_:
+      {
+	st0_ptr = &st(-1);
+	if ( st0_ptr->tag != TW_Empty )
+	  { stack_overflow(); return 0; }
+	top--;
+      }
+      break;
+    case _null_:
+      FPU_illegal();
+      return 0;
+#ifdef PARANOID
+    default:
+      EXCEPTION(EX_INTERNAL|0x141);
+      return 0;
+#endif PARANOID
+    }
+
+  switch ( type )
+    {
+    case 000:       /* fld m32real */
+      clear_C1();
+      reg_load_single((float *)data_address, &loaded_data);
+      if ( (loaded_data.tag == TW_NaN) &&
+	  real_2op_NaN(&loaded_data, &loaded_data, &loaded_data) )
+	{
+	  top++;
+	  break;
+	}
+      reg_move(&loaded_data, st0_ptr);
+      break;
+    case 001:      /* fild m32int */
+      clear_C1();
+      reg_load_int32((long *)data_address, st0_ptr);
+      break;
+    case 002:      /* fld m64real */
+      clear_C1();
+      reg_load_double((double *)data_address, &loaded_data);
+      if ( (loaded_data.tag == TW_NaN) &&
+	  real_2op_NaN(&loaded_data, &loaded_data, &loaded_data) )
+	{
+	  top++;
+	  break;
+	}
+      reg_move(&loaded_data, st0_ptr);
+      break;
+    case 003:      /* fild m16int */
+      clear_C1();
+      reg_load_int16((short *)data_address, st0_ptr);
+      break;
+    case 010:      /* fst m32real */
+      clear_C1();
+      reg_store_single((float *)data_address, st0_ptr);
+      break;
+    case 011:      /* fist m32int */
+      clear_C1();
+      reg_store_int32((long *)data_address, st0_ptr);
+      break;
+    case 012:     /* fst m64real */
+      clear_C1();
+      reg_store_double((double *)data_address, st0_ptr);
+      break;
+    case 013:     /* fist m16int */
+      clear_C1();
+      reg_store_int16((short *)data_address, st0_ptr);
+      break;
+    case 014:     /* fstp m32real */
+      clear_C1();
+      if ( reg_store_single((float *)data_address, st0_ptr) )
+	pop_0();  /* pop only if the number was actually stored
+		     (see the 80486 manual p16-28) */
+      break;
+    case 015:     /* fistp m32int */
+      clear_C1();
+      if ( reg_store_int32((long *)data_address, st0_ptr) )
+	pop_0();  /* pop only if the number was actually stored
+		     (see the 80486 manual p16-28) */
+      break;
+    case 016:     /* fstp m64real */
+      clear_C1();
+      if ( reg_store_double((double *)data_address, st0_ptr) )
+	pop_0();  /* pop only if the number was actually stored
+		     (see the 80486 manual p16-28) */
+      break;
+    case 017:     /* fistp m16int */
+      clear_C1();
+      if ( reg_store_int16((short *)data_address, st0_ptr) )
+	pop_0();  /* pop only if the number was actually stored
+		     (see the 80486 manual p16-28) */
+      break;
+    case 020:     /* fldenv  m14/28byte */
+      fldenv(addr_modes, (char *)data_address);
+      /* Ensure that the values just loaded are not changed by
+	 fix-up operations. */
+      return 1;
+    case 022:     /* frstor m94/108byte */
+      frstor(addr_modes, (char *)data_address);
+      /* Ensure that the values just loaded are not changed by
+	 fix-up operations. */
+      return 1;
+    case 023:     /* fbld m80dec */
+      clear_C1();
+      reg_load_bcd((char *)data_address, st0_ptr);
+      break;
+    case 024:     /* fldcw */
+      RE_ENTRANT_CHECK_OFF;
+      FPU_verify_area(VERIFY_READ, data_address, 2);
+      control_word = get_fs_word((unsigned short *) data_address);
+      RE_ENTRANT_CHECK_ON;
+      if ( partial_status & ~control_word & CW_Exceptions )
+	partial_status |= (SW_Summary | SW_Backward);
+      else
+	partial_status &= ~(SW_Summary | SW_Backward);
+#ifdef PECULIAR_486
+      control_word |= 0x40;  /* An 80486 appears to always set this bit */
+#endif PECULIAR_486
+      return 1;
+    case 025:      /* fld m80real */
+      clear_C1();
+      reg_load_extended((long double *)data_address, st0_ptr);
+      break;
+    case 027:      /* fild m64int */
+      clear_C1();
+      reg_load_int64((long long *)data_address, st0_ptr);
+      break;
+    case 030:     /* fstenv  m14/28byte */
+      fstenv(addr_modes, (char *)data_address);
+      return 1;
+    case 032:      /* fsave */
+      fsave(addr_modes, (char *)data_address);
+      return 1;
+    case 033:      /* fbstp m80dec */
+      clear_C1();
+      if ( reg_store_bcd((char *)data_address, st0_ptr) )
+	pop_0();  /* pop only if the number was actually stored
+		     (see the 80486 manual p16-28) */
+      break;
+    case 034:      /* fstcw m16int */
+      RE_ENTRANT_CHECK_OFF;
+      FPU_verify_area(VERIFY_WRITE,data_address,2);
+      put_fs_word(control_word, (short *) data_address);
+      RE_ENTRANT_CHECK_ON;
+      return 1;
+    case 035:      /* fstp m80real */
+      clear_C1();
+      if ( reg_store_extended((long double *)data_address, st0_ptr) )
+	pop_0();  /* pop only if the number was actually stored
+		     (see the 80486 manual p16-28) */
+      break;
+    case 036:      /* fstsw m2byte */
+      RE_ENTRANT_CHECK_OFF;
+      FPU_verify_area(VERIFY_WRITE,data_address,2);
+      put_fs_word(status_word(),(short *) data_address);
+      RE_ENTRANT_CHECK_ON;
+      return 1;
+    case 037:      /* fistp m64int */
+      clear_C1();
+      if ( reg_store_int64((long long *)data_address, st0_ptr) )
+	pop_0();  /* pop only if the number was actually stored
+		     (see the 80486 manual p16-28) */
+      break;
+    }
+  return 0;
+}
diff --git a/arch/i386/math-emu/mul_Xsig.S b/arch/i386/math-emu/mul_Xsig.S
new file mode 100644
index 000000000..1d88d4466
--- /dev/null
+++ b/arch/i386/math-emu/mul_Xsig.S
@@ -0,0 +1,182 @@
+/*---------------------------------------------------------------------------+
+ |  mul_Xsig.S                                                               |
+ |                                                                           |
+ | Multiply a 12 byte fixed point number by another fixed point number.      |
+ |                                                                           |
+ | Copyright (C) 1992,1994                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |   void mul32_Xsig(Xsig *x, unsigned b)                                    |
+ |                                                                           |
+ |   void mul64_Xsig(Xsig *x, unsigned long long *b)                         |
+ |                                                                           |
+ |   void mul_Xsig_Xsig(Xsig *x, unsigned *b)                                |
+ |                                                                           |
+ | The result is neither rounded nor normalized, and the ls bit or so may    |
+ | be wrong.                                                                 |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+	.file	"mul_Xsig.S"
+
+
+#include "fpu_asm.h"
+
+.text
+	.align 2,144
+.globl _mul32_Xsig
+_mul32_Xsig:
+	pushl %ebp
+	movl %esp,%ebp
+	subl $16,%esp
+	pushl %esi
+
+	movl PARAM1,%esi
+	movl PARAM2,%ecx
+
+	xor %eax,%eax
+	movl %eax,-4(%ebp)
+	movl %eax,-8(%ebp)
+
+	movl (%esi),%eax        /* lsl of Xsig */
+	mull %ecx		/* msl of b */
+	movl %edx,-12(%ebp)
+
+	movl 4(%esi),%eax	/* midl of Xsig */
+	mull %ecx		/* msl of b */
+	addl %eax,-12(%ebp)
+	adcl %edx,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 8(%esi),%eax	/* msl of Xsig */
+	mull %ecx		/* msl of b */
+	addl %eax,-8(%ebp)
+	adcl %edx,-4(%ebp)
+
+	movl -12(%ebp),%eax
+	movl %eax,(%esi)
+	movl -8(%ebp),%eax
+	movl %eax,4(%esi)
+	movl -4(%ebp),%eax
+	movl %eax,8(%esi)
+
+	popl %esi
+	leave
+	ret
+
+
+	.align 2,144
+.globl _mul64_Xsig
+_mul64_Xsig:
+	pushl %ebp
+	movl %esp,%ebp
+	subl $16,%esp
+	pushl %esi
+
+	movl PARAM1,%esi
+	movl PARAM2,%ecx
+
+	xor %eax,%eax
+	movl %eax,-4(%ebp)
+	movl %eax,-8(%ebp)
+
+	movl (%esi),%eax        /* lsl of Xsig */
+	mull 4(%ecx)		/* msl of b */
+	movl %edx,-12(%ebp)
+
+	movl 4(%esi),%eax	/* midl of Xsig */
+	mull (%ecx)		/* lsl of b */
+	addl %edx,-12(%ebp)
+	adcl $0,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 4(%esi),%eax	/* midl of Xsig */
+	mull 4(%ecx)		/* msl of b */
+	addl %eax,-12(%ebp)
+	adcl %edx,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 8(%esi),%eax	/* msl of Xsig */
+	mull (%ecx)		/* lsl of b */
+	addl %eax,-12(%ebp)
+	adcl %edx,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 8(%esi),%eax	/* msl of Xsig */
+	mull 4(%ecx)		/* msl of b */
+	addl %eax,-8(%ebp)
+	adcl %edx,-4(%ebp)
+
+	movl -12(%ebp),%eax
+	movl %eax,(%esi)
+	movl -8(%ebp),%eax
+	movl %eax,4(%esi)
+	movl -4(%ebp),%eax
+	movl %eax,8(%esi)
+
+	popl %esi
+	leave
+	ret
+
+
+
+	.align 2,144
+.globl _mul_Xsig_Xsig
+_mul_Xsig_Xsig:
+	pushl %ebp
+	movl %esp,%ebp
+	subl $16,%esp
+	pushl %esi
+
+	movl PARAM1,%esi
+	movl PARAM2,%ecx
+
+	xor %eax,%eax
+	movl %eax,-4(%ebp)
+	movl %eax,-8(%ebp)
+
+	movl (%esi),%eax        /* lsl of Xsig */
+	mull 8(%ecx)		/* msl of b */
+	movl %edx,-12(%ebp)
+
+	movl 4(%esi),%eax	/* midl of Xsig */
+	mull 4(%ecx)		/* midl of b */
+	addl %edx,-12(%ebp)
+	adcl $0,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 8(%esi),%eax	/* msl of Xsig */
+	mull (%ecx)		/* lsl of b */
+	addl %edx,-12(%ebp)
+	adcl $0,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 4(%esi),%eax	/* midl of Xsig */
+	mull 8(%ecx)		/* msl of b */
+	addl %eax,-12(%ebp)
+	adcl %edx,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 8(%esi),%eax	/* msl of Xsig */
+	mull 4(%ecx)		/* midl of b */
+	addl %eax,-12(%ebp)
+	adcl %edx,-8(%ebp)
+	adcl $0,-4(%ebp)
+
+	movl 8(%esi),%eax	/* msl of Xsig */
+	mull 8(%ecx)		/* msl of b */
+	addl %eax,-8(%ebp)
+	adcl %edx,-4(%ebp)
+
+	movl -12(%ebp),%edx
+	movl %edx,(%esi)
+	movl -8(%ebp),%edx
+	movl %edx,4(%esi)
+	movl -4(%ebp),%edx
+	movl %edx,8(%esi)
+
+	popl %esi
+	leave
+	ret
+
diff --git a/arch/i386/math-emu/poly.h b/arch/i386/math-emu/poly.h
new file mode 100644
index 000000000..397cb9e3e
--- /dev/null
+++ b/arch/i386/math-emu/poly.h
@@ -0,0 +1,116 @@
+/*---------------------------------------------------------------------------+
+ |  poly.h                                                                   |
+ |                                                                           |
+ |  Header file for the FPU-emu poly*.c source files.                        |
+ |                                                                           |
+ | Copyright (C) 1994                                                        |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ | Declarations and definitions for functions operating on Xsig (12-byte     |
+ | extended-significand) quantities.                                         |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#ifndef _POLY_H
+#define _POLY_H
+
+/* This 12-byte structure is used to improve the accuracy of computation
+   of transcendental functions.
+   Intended to be used to get results better than 8-byte computation
+   allows. 9-byte would probably be sufficient.
+   */
+typedef struct {
+  unsigned long lsw;
+  unsigned long midw;
+  unsigned long msw;
+} Xsig;
+
+asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b,
+		      unsigned long long *result);
+asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x,
+				const unsigned long long terms[], const int n);
+
+asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult);
+asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult);
+asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult);
+
+asmlinkage void shr_Xsig(Xsig *, const int n);
+asmlinkage int round_Xsig(Xsig *);
+asmlinkage int norm_Xsig(Xsig *);
+asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest);
+
+/* Macro to extract the most significant 32 bits from a long long */
+#define LL_MSW(x)     (((unsigned long *)&x)[1])
+
+/* Macro to initialize an Xsig struct */
+#define MK_XSIG(a,b,c)     { c, b, a }
+
+/* Macro to access the 8 ms bytes of an Xsig as a long long */
+#define XSIG_LL(x)         (*(unsigned long long *)&x.midw)
+
+
+/*
+   Need to run gcc with optimizations on to get these to
+   actually be in-line.
+   */
+
+/* Multiply two fixed-point 32 bit numbers. */
+extern inline void mul_32_32(const unsigned long arg1,
+			     const unsigned long arg2,
+			     unsigned long *out)
+{
+  asm volatile ("movl %1,%%eax; mull %2; movl %%edx,%0" \
+		:"=g" (*out) \
+		:"g" (arg1), "g" (arg2) \
+		:"ax","dx");
+}
+
+
+/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */
+extern inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2)
+{
+  asm volatile ("movl %1,%%edi; movl %2,%%esi;
+                 movl (%%esi),%%eax; addl %%eax,(%%edi);
+                 movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);
+                 movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);"
+                 :"=g" (*dest):"g" (dest), "g" (x2)
+                 :"ax","si","di");
+}
+
+
+/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */
+/* Note: the constraints in the asm statement didn't always work properly
+   with gcc 2.5.8.  Changing from using edi to using ecx got around the
+   problem, but keep fingers crossed! */
+extern inline int add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp)
+{
+  asm volatile ("movl %2,%%ecx; movl %3,%%esi;
+                 movl (%%esi),%%eax; addl %%eax,(%%ecx);
+                 movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);
+                 movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);
+                 jnc 0f;
+		 rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)
+                 movl %4,%%ecx; incl (%%ecx)
+                 movl $1,%%eax; jmp 1f;
+                 0: xorl %%eax,%%eax;
+                 1:"
+		:"=g" (*exp), "=g" (*dest)
+		:"g" (dest), "g" (x2), "g" (exp)
+		:"cx","si","ax");
+}
+
+
+/* Negate (subtract from 1.0) the 12 byte Xsig */
+/* This is faster in a loop on my 386 than using the "neg" instruction. */
+extern inline void negate_Xsig(Xsig *x)
+{
+  asm volatile("movl %1,%%esi; "
+               "xorl %%ecx,%%ecx; "
+               "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi); "
+               "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi); "
+               "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi); "
+               :"=g" (*x):"g" (x):"si","ax","cx");
+}
+
+#endif _POLY_H
diff --git a/arch/i386/math-emu/poly_2xm1.c b/arch/i386/math-emu/poly_2xm1.c
new file mode 100644
index 000000000..f7c585d60
--- /dev/null
+++ b/arch/i386/math-emu/poly_2xm1.c
@@ -0,0 +1,152 @@
+/*---------------------------------------------------------------------------+
+ |  poly_2xm1.c                                                              |
+ |                                                                           |
+ | Function to compute 2^x-1 by a polynomial approximation.                  |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "control_w.h"
+#include "poly.h"
+
+
+#define	HIPOWER	11
+static const unsigned long long lterms[HIPOWER] =
+{
+  0x0000000000000000LL,  /* This term done separately as 12 bytes */
+  0xf5fdeffc162c7543LL,
+  0x1c6b08d704a0bfa6LL,
+  0x0276556df749cc21LL,
+  0x002bb0ffcf14f6b8LL,
+  0x0002861225ef751cLL,
+  0x00001ffcbfcd5422LL,
+  0x00000162c005d5f1LL,
+  0x0000000da96ccb1bLL,
+  0x0000000078d1b897LL,
+  0x000000000422b029LL
+};
+
+static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194);
+
+/* Four slices: 0.0 : 0.25 : 0.50 : 0.75 : 1.0,
+   These numbers are 2^(1/4), 2^(1/2), and 2^(3/4)
+ */
+static const Xsig shiftterm0 = MK_XSIG(0, 0, 0);
+static const Xsig shiftterm1 = MK_XSIG(0x9837f051, 0x8db8a96f, 0x46ad2318);
+static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3);
+static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9);
+
+static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1,
+				     &shiftterm2, &shiftterm3 };
+
+
+/*--- poly_2xm1() -----------------------------------------------------------+
+ | Requires an argument which is TW_Valid and < 1.                           |
+ +---------------------------------------------------------------------------*/
+int	poly_2xm1(FPU_REG const *arg, FPU_REG *result)
+{
+  long int               exponent, shift;
+  unsigned long long     Xll;
+  Xsig                   accumulator, Denom, argSignif;
+
+
+  exponent = arg->exp - EXP_BIAS;
+
+#ifdef PARANOID
+  if (   (exponent >= 0)    	/* Don't want a |number| >= 1.0 */
+      || (arg->tag != TW_Valid) )
+    {
+      /* Number negative, too large, or not Valid. */
+      EXCEPTION(EX_INTERNAL|0x127);
+      return 1;
+    }
+#endif PARANOID
+
+  argSignif.lsw = 0;
+  XSIG_LL(argSignif) = Xll = significand(arg);
+
+  if ( exponent == -1 )
+    {
+      shift = (argSignif.msw & 0x40000000) ? 3 : 2;
+      /* subtract 0.5 or 0.75 */
+      exponent -= 2;
+      XSIG_LL(argSignif) <<= 2;
+      Xll <<= 2;
+    }
+  else if ( exponent == -2 )
+    {
+      shift = 1;
+      /* subtract 0.25 */
+      exponent--;
+      XSIG_LL(argSignif) <<= 1;
+      Xll <<= 1;
+    }
+  else
+    shift = 0;
+
+  if ( exponent < -2 )
+    {
+      /* Shift the argument right by the required places. */
+      if ( shrx(&Xll, -2-exponent) >= 0x80000000U )
+	Xll++;	/* round up */
+    }
+
+  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+  polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1);
+  mul_Xsig_Xsig(&accumulator, &argSignif);
+  shr_Xsig(&accumulator, 3);
+
+  mul_Xsig_Xsig(&argSignif, &hiterm);   /* The leading term */
+  add_two_Xsig(&accumulator, &argSignif, &exponent);
+
+  if ( shift )
+    {
+      /* The argument is large, use the identity:
+	 f(x+a) = f(a) * (f(x) + 1) - 1;
+	 */
+      shr_Xsig(&accumulator, - exponent);
+      accumulator.msw |= 0x80000000;      /* add 1.0 */
+      mul_Xsig_Xsig(&accumulator, shiftterm[shift]);
+      accumulator.msw &= 0x3fffffff;      /* subtract 1.0 */
+      exponent = 1;
+    }
+
+  if ( arg->sign != SIGN_POS )
+    {
+      /* The argument is negative, use the identity:
+	     f(-x) = -f(x) / (1 + f(x))
+	 */
+      Denom.lsw = accumulator.lsw;
+      XSIG_LL(Denom) = XSIG_LL(accumulator);
+      if ( exponent < 0 )
+	shr_Xsig(&Denom, - exponent);
+      else if ( exponent > 0 )
+	{
+	  /* exponent must be 1 here */
+	  XSIG_LL(Denom) <<= 1;
+	  if ( Denom.lsw & 0x80000000 )
+	    XSIG_LL(Denom) |= 1;
+	  (Denom.lsw) <<= 1;
+	}
+      Denom.msw |= 0x80000000;      /* add 1.0 */
+      div_Xsig(&accumulator, &Denom, &accumulator);
+    }
+
+  /* Convert to 64 bit signed-compatible */
+  exponent += round_Xsig(&accumulator);
+
+  significand(result) = XSIG_LL(accumulator);
+  result->tag = TW_Valid;
+  result->exp = exponent + EXP_BIAS;
+  result->sign = arg->sign;
+
+  return 0;
+
+}
diff --git a/arch/i386/math-emu/poly_atan.c b/arch/i386/math-emu/poly_atan.c
new file mode 100644
index 000000000..6edca625f
--- /dev/null
+++ b/arch/i386/math-emu/poly_atan.c
@@ -0,0 +1,197 @@
+/*---------------------------------------------------------------------------+
+ |  poly_atan.c                                                              |
+ |                                                                           |
+ | Compute the arctan of a FPU_REG, using a polynomial approximation.        |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "status_w.h"
+#include "control_w.h"
+#include "poly.h"
+
+
+#define	HIPOWERon	6	/* odd poly, negative terms */
+static const unsigned long long oddnegterms[HIPOWERon] =
+{
+  0x0000000000000000LL, /* Dummy (not for - 1.0) */
+  0x015328437f756467LL,
+  0x0005dda27b73dec6LL,
+  0x0000226bf2bfb91aLL,
+  0x000000ccc439c5f7LL,
+  0x0000000355438407LL
+} ;
+
+#define	HIPOWERop	6	/* odd poly, positive terms */
+static const unsigned long long oddplterms[HIPOWERop] =
+{
+/*  0xaaaaaaaaaaaaaaabLL,  transferred to fixedpterm[] */
+  0x0db55a71875c9ac2LL,
+  0x0029fce2d67880b0LL,
+  0x0000dfd3908b4596LL,
+  0x00000550fd61dab4LL,
+  0x0000001c9422b3f9LL,
+  0x000000003e3301e1LL
+};
+
+static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL;
+
+static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa);
+
+static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b);
+
+
+/*--- poly_atan() -----------------------------------------------------------+
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+void	poly_atan(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *result)
+{
+  char		        transformed, inverted,
+                        sign1 = arg1->sign, sign2 = arg2->sign;
+  long int   		exponent, dummy_exp;
+  Xsig                  accumulator, Numer, Denom, accumulatore, argSignif,
+                        argSq, argSqSq;
+  
+
+  arg1->sign = arg2->sign = SIGN_POS;
+  if ( (compare(arg2) & ~COMP_Denormal) == COMP_A_lt_B )
+    {
+      inverted = 1;
+      exponent = arg1->exp - arg2->exp;
+      Numer.lsw = Denom.lsw = 0;
+      XSIG_LL(Numer) = significand(arg1);
+      XSIG_LL(Denom) = significand(arg2);
+    }
+  else
+    {
+      inverted = 0;
+      exponent = arg2->exp - arg1->exp;
+      Numer.lsw = Denom.lsw = 0;
+      XSIG_LL(Numer) = significand(arg2);
+      XSIG_LL(Denom) = significand(arg1);
+     }
+  div_Xsig(&Numer, &Denom, &argSignif);
+  exponent += norm_Xsig(&argSignif);
+
+  if ( (exponent >= -1)
+      || ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) )
+    {
+      /* The argument is greater than sqrt(2)-1 (=0.414213562...) */
+      /* Convert the argument by an identity for atan */
+      transformed = 1;
+
+      if ( exponent >= 0 )
+	{
+#ifdef PARANOID
+	  if ( !( (exponent == 0) && 
+		 (argSignif.lsw == 0) && (argSignif.midw == 0) &&
+		 (argSignif.msw == 0x80000000) ) )
+	    {
+	      EXCEPTION(EX_INTERNAL|0x104);  /* There must be a logic error */
+	      return;
+	    }
+#endif PARANOID
+	  argSignif.msw = 0;   /* Make the transformed arg -> 0.0 */
+	}
+      else
+	{
+	  Numer.lsw = Denom.lsw = argSignif.lsw;
+	  XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif);
+
+	  if ( exponent < -1 )
+	    shr_Xsig(&Numer, -1-exponent);
+	  negate_Xsig(&Numer);
+      
+	  shr_Xsig(&Denom, -exponent);
+	  Denom.msw |= 0x80000000;
+      
+	  div_Xsig(&Numer, &Denom, &argSignif);
+
+	  exponent = -1 + norm_Xsig(&argSignif);
+	}
+    }
+  else
+    {
+      transformed = 0;
+    }
+
+  argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw;
+  argSq.msw = argSignif.msw;
+  mul_Xsig_Xsig(&argSq, &argSq);
+  
+  argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw;
+  mul_Xsig_Xsig(&argSqSq, &argSqSq);
+
+  accumulatore.lsw = argSq.lsw;
+  XSIG_LL(accumulatore) = XSIG_LL(argSq);
+
+  shr_Xsig(&argSq, 2*(-1-exponent-1));
+  shr_Xsig(&argSqSq, 4*(-1-exponent-1));
+
+  /* Now have argSq etc with binary point at the left
+     .1xxxxxxxx */
+
+  /* Do the basic fixed point polynomial evaluation */
+  accumulator.msw = accumulator.midw = accumulator.lsw = 0;
+  polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq),
+		   oddplterms, HIPOWERop-1);
+  mul64_Xsig(&accumulator, &XSIG_LL(argSq));
+  negate_Xsig(&accumulator);
+  polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1);
+  negate_Xsig(&accumulator);
+  add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp);
+
+  mul64_Xsig(&accumulatore, &denomterm);
+  shr_Xsig(&accumulatore, 1 + 2*(-1-exponent));
+  accumulatore.msw |= 0x80000000;
+
+  div_Xsig(&accumulator, &accumulatore, &accumulator);
+
+  mul_Xsig_Xsig(&accumulator, &argSignif);
+  mul_Xsig_Xsig(&accumulator, &argSq);
+
+  shr_Xsig(&accumulator, 3);
+  negate_Xsig(&accumulator);
+  add_Xsig_Xsig(&accumulator, &argSignif);
+
+  if ( transformed )
+    {
+      /* compute pi/4 - accumulator */
+      shr_Xsig(&accumulator, -1-exponent);
+      negate_Xsig(&accumulator);
+      add_Xsig_Xsig(&accumulator, &pi_signif);
+      exponent = -1;
+    }
+
+  if ( inverted )
+    {
+      /* compute pi/2 - accumulator */
+      shr_Xsig(&accumulator, -exponent);
+      negate_Xsig(&accumulator);
+      add_Xsig_Xsig(&accumulator, &pi_signif);
+      exponent = 0;
+    }
+
+  if ( sign1 )
+    {
+      /* compute pi - accumulator */
+      shr_Xsig(&accumulator, 1 - exponent);
+      negate_Xsig(&accumulator);
+      add_Xsig_Xsig(&accumulator, &pi_signif);
+      exponent = 1;
+    }
+
+  exponent += round_Xsig(&accumulator);
+  significand(result) = XSIG_LL(accumulator);
+  result->exp = exponent + EXP_BIAS;
+  result->tag = TW_Valid;
+  result->sign = sign2;
+
+}
diff --git a/arch/i386/math-emu/poly_l2.c b/arch/i386/math-emu/poly_l2.c
new file mode 100644
index 000000000..1677f4aff
--- /dev/null
+++ b/arch/i386/math-emu/poly_l2.c
@@ -0,0 +1,255 @@
+/*---------------------------------------------------------------------------+
+ |  poly_l2.c                                                                |
+ |                                                                           |
+ | Compute the base 2 log of a FPU_REG, using a polynomial approximation.    |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "control_w.h"
+#include "poly.h"
+
+
+
+static void log2_kernel(FPU_REG const *arg,
+			Xsig *accum_result, long int *expon);
+
+
+/*--- poly_l2() -------------------------------------------------------------+
+ |   Base 2 logarithm by a polynomial approximation.                         |
+ +---------------------------------------------------------------------------*/
+void	poly_l2(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result)
+{
+  long int	       exponent, expon, expon_expon;
+  Xsig                 accumulator, expon_accum, yaccum;
+  char		       sign;
+  FPU_REG              x;
+
+
+  exponent = arg->exp - EXP_BIAS;
+
+  /* From arg, make a number > sqrt(2)/2 and < sqrt(2) */
+  if ( arg->sigh > (unsigned)0xb504f334 )
+    {
+      /* Treat as  sqrt(2)/2 < arg < 1 */
+      significand(&x) = - significand(arg);
+      x.sign = SIGN_NEG;
+      x.tag = TW_Valid;
+      x.exp = EXP_BIAS-1;
+      exponent++;
+      normalize(&x);
+    }
+  else
+    {
+      /* Treat as  1 <= arg < sqrt(2) */
+      x.sigh = arg->sigh - 0x80000000;
+      x.sigl = arg->sigl;
+      x.sign = SIGN_POS;
+      x.tag = TW_Valid;
+      x.exp = EXP_BIAS;
+      normalize(&x);
+    }
+
+  if ( x.tag == TW_Zero )
+    {
+      expon = 0;
+      accumulator.msw = accumulator.midw = accumulator.lsw = 0;
+    }
+  else
+    {
+      log2_kernel(&x, &accumulator, &expon);
+    }
+
+  sign = exponent < 0;
+  if ( sign ) exponent = -exponent;
+  expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0;
+  if ( exponent )
+    {
+      expon_expon = 31 + norm_Xsig(&expon_accum);
+      shr_Xsig(&accumulator, expon_expon - expon);
+
+      if ( sign ^ (x.sign == SIGN_NEG) )
+	negate_Xsig(&accumulator);
+      add_Xsig_Xsig(&accumulator, &expon_accum);
+    }
+  else
+    {
+      expon_expon = expon;
+      sign = x.sign;
+    }
+
+  yaccum.lsw = 0; XSIG_LL(yaccum) = significand(y);
+  mul_Xsig_Xsig(&accumulator, &yaccum);
+
+  expon_expon += round_Xsig(&accumulator);
+
+  if ( accumulator.msw == 0 )
+    {
+      reg_move(&CONST_Z, y);
+    }
+  else
+    {
+      result->exp = expon_expon + y->exp + 1;
+      significand(result) = XSIG_LL(accumulator);
+      result->tag = TW_Valid; /* set the tags to Valid */
+      result->sign = sign ^ y->sign;
+    }
+
+  return;
+}
+
+
+/*--- poly_l2p1() -----------------------------------------------------------+
+ |   Base 2 logarithm by a polynomial approximation.                         |
+ |   log2(x+1)                                                               |
+ +---------------------------------------------------------------------------*/
+int	poly_l2p1(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result)
+{
+  char                 sign;
+  long int             exponent;
+  Xsig                 accumulator, yaccum;
+
+
+  sign = arg->sign;
+
+  if ( arg->exp < EXP_BIAS )
+    {
+      log2_kernel(arg, &accumulator, &exponent);
+
+      yaccum.lsw = 0;
+      XSIG_LL(yaccum) = significand(y);
+      mul_Xsig_Xsig(&accumulator, &yaccum);
+
+      exponent += round_Xsig(&accumulator);
+
+      result->exp = exponent + y->exp + 1;
+      significand(result) = XSIG_LL(accumulator);
+      result->tag = TW_Valid; /* set the tags to Valid */
+      result->sign = sign ^ y->sign;
+
+      return 0;
+    }
+  else
+    {
+      /* The magnitude of arg is far too large. */
+      reg_move(y, result);
+      if ( sign != SIGN_POS )
+	{
+	  /* Trying to get the log of a negative number. */
+	  return 1;
+	}
+      else
+	{
+	  return 0;
+	}
+    }
+
+}
+
+
+
+
+#undef HIPOWER
+#define	HIPOWER	10
+static const unsigned long long logterms[HIPOWER] =
+{
+  0x2a8eca5705fc2ef0LL,
+  0xf6384ee1d01febceLL,
+  0x093bb62877cdf642LL,
+  0x006985d8a9ec439bLL,
+  0x0005212c4f55a9c8LL,
+  0x00004326a16927f0LL,
+  0x0000038d1d80a0e7LL,
+  0x0000003141cc80c6LL,
+  0x00000002b1668c9fLL,
+  0x000000002c7a46aaLL
+};
+
+static const unsigned long leadterm = 0xb8000000;
+
+
+/*--- log2_kernel() ---------------------------------------------------------+
+ |   Base 2 logarithm by a polynomial approximation.                         |
+ |   log2(x+1)                                                               |
+ +---------------------------------------------------------------------------*/
+static void log2_kernel(FPU_REG const *arg, Xsig *accum_result,
+			long int *expon)
+{
+  char                 sign;
+  long int             exponent, adj;
+  unsigned long long   Xsq;
+  Xsig                 accumulator, Numer, Denom, argSignif, arg_signif;
+
+  sign = arg->sign;
+
+  exponent = arg->exp - EXP_BIAS;
+  Numer.lsw = Denom.lsw = 0;
+  XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg);
+  if ( sign == SIGN_POS )
+    {
+      shr_Xsig(&Denom, 2 - (1 + exponent));
+      Denom.msw |= 0x80000000;
+      div_Xsig(&Numer, &Denom, &argSignif);
+    }
+  else
+    {
+      shr_Xsig(&Denom, 1 - (1 + exponent));
+      negate_Xsig(&Denom);
+      if ( Denom.msw & 0x80000000 )
+	{
+	  div_Xsig(&Numer, &Denom, &argSignif);
+	  exponent ++;
+	}
+      else
+	{
+	  /* Denom must be 1.0 */
+	  argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw;
+	  argSignif.msw = Numer.msw;
+	}
+    }
+
+#ifndef PECULIAR_486
+  /* Should check here that  |local_arg|  is within the valid range */
+  if ( exponent >= -2 )
+    {
+      if ( (exponent > -2) ||
+	  (argSignif.msw > (unsigned)0xafb0ccc0) )
+	{
+	  /* The argument is too large */
+	}
+    }
+#endif PECULIAR_486
+
+  arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif);
+  adj = norm_Xsig(&argSignif);
+  accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif);
+  mul_Xsig_Xsig(&accumulator, &accumulator);
+  shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj)));
+  Xsq = XSIG_LL(accumulator);
+  if ( accumulator.lsw & 0x80000000 )
+    Xsq++;
+
+  accumulator.msw = accumulator.midw = accumulator.lsw = 0;
+  /* Do the basic fixed point polynomial evaluation */
+  polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1);
+
+  mul_Xsig_Xsig(&accumulator, &argSignif);
+  shr_Xsig(&accumulator, 6 - adj);
+
+  mul32_Xsig(&arg_signif, leadterm);
+  add_two_Xsig(&accumulator, &arg_signif, &exponent);
+
+  *expon = exponent + 1;
+  accum_result->lsw = accumulator.lsw;
+  accum_result->midw = accumulator.midw;
+  accum_result->msw = accumulator.msw;
+
+}
diff --git a/arch/i386/math-emu/poly_sin.c b/arch/i386/math-emu/poly_sin.c
new file mode 100644
index 000000000..03db5b6aa
--- /dev/null
+++ b/arch/i386/math-emu/poly_sin.c
@@ -0,0 +1,408 @@
+/*---------------------------------------------------------------------------+
+ |  poly_sin.c                                                               |
+ |                                                                           |
+ |  Computation of an approximation of the sin function and the cosine       |
+ |  function by a polynomial.                                                |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "control_w.h"
+#include "poly.h"
+
+
+#define	N_COEFF_P	4
+#define	N_COEFF_N	4
+
+static const unsigned long long pos_terms_l[N_COEFF_P] =
+{
+  0xaaaaaaaaaaaaaaabLL,
+  0x00d00d00d00cf906LL,
+  0x000006b99159a8bbLL,
+  0x000000000d7392e6LL
+};
+
+static const unsigned long long neg_terms_l[N_COEFF_N] =
+{
+  0x2222222222222167LL,
+  0x0002e3bc74aab624LL,
+  0x0000000b09229062LL,
+  0x00000000000c7973LL
+};
+
+
+
+#define	N_COEFF_PH	4
+#define	N_COEFF_NH	4
+static const unsigned long long pos_terms_h[N_COEFF_PH] =
+{
+  0x0000000000000000LL,
+  0x05b05b05b05b0406LL,
+  0x000049f93edd91a9LL,
+  0x00000000c9c9ed62LL
+};
+
+static const unsigned long long neg_terms_h[N_COEFF_NH] =
+{
+  0xaaaaaaaaaaaaaa98LL,
+  0x001a01a01a019064LL,
+  0x0000008f76c68a77LL,
+  0x0000000000d58f5eLL
+};
+
+
+/*--- poly_sine() -----------------------------------------------------------+
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+void	poly_sine(FPU_REG const *arg, FPU_REG *result)
+{
+  int                 exponent, echange;
+  Xsig                accumulator, argSqrd, argTo4;
+  unsigned long       fix_up, adj;
+  unsigned long long  fixed_arg;
+
+
+#ifdef PARANOID
+  if ( arg->tag == TW_Zero )
+    {
+      /* Return 0.0 */
+      reg_move(&CONST_Z, result);
+      return;
+    }
+#endif PARANOID
+
+  exponent = arg->exp - EXP_BIAS;
+
+  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+
+  /* Split into two ranges, for arguments below and above 1.0 */
+  /* The boundary between upper and lower is approx 0.88309101259 */
+  if ( (exponent < -1) || ((exponent == -1) && (arg->sigh <= 0xe21240aa)) )
+    {
+      /* The argument is <= 0.88309101259 */
+
+      argSqrd.msw = arg->sigh; argSqrd.midw = arg->sigl; argSqrd.lsw = 0;
+      mul64_Xsig(&argSqrd, &significand(arg));
+      shr_Xsig(&argSqrd, 2*(-1-exponent));
+      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
+      argTo4.lsw = argSqrd.lsw;
+      mul_Xsig_Xsig(&argTo4, &argTo4);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
+		      N_COEFF_N-1);
+      mul_Xsig_Xsig(&accumulator, &argSqrd);
+      negate_Xsig(&accumulator);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
+		      N_COEFF_P-1);
+
+      shr_Xsig(&accumulator, 2);    /* Divide by four */
+      accumulator.msw |= 0x80000000;  /* Add 1.0 */
+
+      mul64_Xsig(&accumulator, &significand(arg));
+      mul64_Xsig(&accumulator, &significand(arg));
+      mul64_Xsig(&accumulator, &significand(arg));
+
+      /* Divide by four, FPU_REG compatible, etc */
+      exponent = 3*exponent + EXP_BIAS;
+
+      /* The minimum exponent difference is 3 */
+      shr_Xsig(&accumulator, arg->exp - exponent);
+
+      negate_Xsig(&accumulator);
+      XSIG_LL(accumulator) += significand(arg);
+
+      echange = round_Xsig(&accumulator);
+
+      result->exp = arg->exp + echange;
+    }
+  else
+    {
+      /* The argument is > 0.88309101259 */
+      /* We use sin(arg) = cos(pi/2-arg) */
+
+      fixed_arg = significand(arg);
+
+      if ( exponent == 0 )
+	{
+	  /* The argument is >= 1.0 */
+
+	  /* Put the binary point at the left. */
+	  fixed_arg <<= 1;
+	}
+      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+      fixed_arg = 0x921fb54442d18469LL - fixed_arg;
+
+      XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
+      mul64_Xsig(&argSqrd, &fixed_arg);
+
+      XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw;
+      mul_Xsig_Xsig(&argTo4, &argTo4);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
+		      N_COEFF_NH-1);
+      mul_Xsig_Xsig(&accumulator, &argSqrd);
+      negate_Xsig(&accumulator);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
+		      N_COEFF_PH-1);
+      negate_Xsig(&accumulator);
+
+      mul64_Xsig(&accumulator, &fixed_arg);
+      mul64_Xsig(&accumulator, &fixed_arg);
+
+      shr_Xsig(&accumulator, 3);
+      negate_Xsig(&accumulator);
+
+      add_Xsig_Xsig(&accumulator, &argSqrd);
+
+      shr_Xsig(&accumulator, 1);
+
+      accumulator.lsw |= 1;  /* A zero accumulator here would cause problems */
+      negate_Xsig(&accumulator);
+
+      /* The basic computation is complete. Now fix the answer to
+	 compensate for the error due to the approximation used for
+	 pi/2
+	 */
+
+      /* This has an exponent of -65 */
+      fix_up = 0x898cc517;
+      /* The fix-up needs to be improved for larger args */
+      if ( argSqrd.msw & 0xffc00000 )
+	{
+	  /* Get about 32 bit precision in these: */
+	  mul_32_32(0x898cc517, argSqrd.msw, &adj);
+	  fix_up -= adj/6;
+	}
+      mul_32_32(fix_up, LL_MSW(fixed_arg), &fix_up);
+
+      adj = accumulator.lsw;    /* temp save */
+      accumulator.lsw -= fix_up;
+      if ( accumulator.lsw > adj )
+	XSIG_LL(accumulator) --;
+
+      echange = round_Xsig(&accumulator);
+
+      result->exp = EXP_BIAS - 1 + echange;
+    }
+
+  significand(result) = XSIG_LL(accumulator);
+  result->tag = TW_Valid;
+  result->sign = arg->sign;
+
+#ifdef PARANOID
+  if ( (result->exp >= EXP_BIAS)
+      && (significand(result) > 0x8000000000000000LL) )
+    {
+      EXCEPTION(EX_INTERNAL|0x150);
+    }
+#endif PARANOID
+
+}
+
+
+
+/*--- poly_cos() ------------------------------------------------------------+
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+void	poly_cos(FPU_REG const *arg, FPU_REG *result)
+{
+  long int            exponent, exp2, echange;
+  Xsig                accumulator, argSqrd, fix_up, argTo4;
+  unsigned long       adj;
+  unsigned long long  fixed_arg;
+
+
+#ifdef PARANOID
+  if ( arg->tag == TW_Zero )
+    {
+      /* Return 1.0 */
+      reg_move(&CONST_1, result);
+      return;
+    }
+
+  if ( (arg->exp > EXP_BIAS)
+      || ((arg->exp == EXP_BIAS)
+	  && (significand(arg) > 0xc90fdaa22168c234LL)) )
+    {
+      EXCEPTION(EX_Invalid);
+      reg_move(&CONST_QNaN, result);
+      return;
+    }
+#endif PARANOID
+
+  exponent = arg->exp - EXP_BIAS;
+
+  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+
+  if ( (exponent < -1) || ((exponent == -1) && (arg->sigh <= 0xb00d6f54)) )
+    {
+      /* arg is < 0.687705 */
+
+      argSqrd.msw = arg->sigh; argSqrd.midw = arg->sigl; argSqrd.lsw = 0;
+      mul64_Xsig(&argSqrd, &significand(arg));
+
+      if ( exponent < -1 )
+	{
+	  /* shift the argument right by the required places */
+	  shr_Xsig(&argSqrd, 2*(-1-exponent));
+	}
+
+      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
+      argTo4.lsw = argSqrd.lsw;
+      mul_Xsig_Xsig(&argTo4, &argTo4);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
+		      N_COEFF_NH-1);
+      mul_Xsig_Xsig(&accumulator, &argSqrd);
+      negate_Xsig(&accumulator);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
+		      N_COEFF_PH-1);
+      negate_Xsig(&accumulator);
+
+      mul64_Xsig(&accumulator, &significand(arg));
+      mul64_Xsig(&accumulator, &significand(arg));
+      shr_Xsig(&accumulator, -2*(1+exponent));
+
+      shr_Xsig(&accumulator, 3);
+      negate_Xsig(&accumulator);
+
+      add_Xsig_Xsig(&accumulator, &argSqrd);
+
+      shr_Xsig(&accumulator, 1);
+
+      /* It doesn't matter if accumulator is all zero here, the
+	 following code will work ok */
+      negate_Xsig(&accumulator);
+
+      if ( accumulator.lsw & 0x80000000 )
+	XSIG_LL(accumulator) ++;
+      if ( accumulator.msw == 0 )
+	{
+	  /* The result is 1.0 */
+	  reg_move(&CONST_1, result);
+	}
+      else
+	{
+	  significand(result) = XSIG_LL(accumulator);
+      
+	  /* will be a valid positive nr with expon = -1 */
+	  *(short *)&(result->sign) = 0;
+	  result->exp = EXP_BIAS - 1;
+	}
+    }
+  else
+    {
+      fixed_arg = significand(arg);
+
+      if ( exponent == 0 )
+	{
+	  /* The argument is >= 1.0 */
+
+	  /* Put the binary point at the left. */
+	  fixed_arg <<= 1;
+	}
+      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+      fixed_arg = 0x921fb54442d18469LL - fixed_arg;
+
+      exponent = -1;
+      exp2 = -1;
+
+      /* A shift is needed here only for a narrow range of arguments,
+	 i.e. for fixed_arg approx 2^-32, but we pick up more... */
+      if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
+	{
+	  fixed_arg <<= 16;
+	  exponent -= 16;
+	  exp2 -= 16;
+	}
+
+      XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
+      mul64_Xsig(&argSqrd, &fixed_arg);
+
+      if ( exponent < -1 )
+	{
+	  /* shift the argument right by the required places */
+	  shr_Xsig(&argSqrd, 2*(-1-exponent));
+	}
+
+      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
+      argTo4.lsw = argSqrd.lsw;
+      mul_Xsig_Xsig(&argTo4, &argTo4);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
+		      N_COEFF_N-1);
+      mul_Xsig_Xsig(&accumulator, &argSqrd);
+      negate_Xsig(&accumulator);
+
+      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
+		      N_COEFF_P-1);
+
+      shr_Xsig(&accumulator, 2);    /* Divide by four */
+      accumulator.msw |= 0x80000000;  /* Add 1.0 */
+
+      mul64_Xsig(&accumulator, &fixed_arg);
+      mul64_Xsig(&accumulator, &fixed_arg);
+      mul64_Xsig(&accumulator, &fixed_arg);
+
+      /* Divide by four, FPU_REG compatible, etc */
+      exponent = 3*exponent;
+
+      /* The minimum exponent difference is 3 */
+      shr_Xsig(&accumulator, exp2 - exponent);
+
+      negate_Xsig(&accumulator);
+      XSIG_LL(accumulator) += fixed_arg;
+
+      /* The basic computation is complete. Now fix the answer to
+	 compensate for the error due to the approximation used for
+	 pi/2
+	 */
+
+      /* This has an exponent of -65 */
+      XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
+      fix_up.lsw = 0;
+
+      /* The fix-up needs to be improved for larger args */
+      if ( argSqrd.msw & 0xffc00000 )
+	{
+	  /* Get about 32 bit precision in these: */
+	  mul_32_32(0x898cc517, argSqrd.msw, &adj);
+	  fix_up.msw -= adj/2;
+	  mul_32_32(0x898cc517, argTo4.msw, &adj);
+	  fix_up.msw += adj/24;
+	}
+
+      exp2 += norm_Xsig(&accumulator);
+      shr_Xsig(&accumulator, 1); /* Prevent overflow */
+      exp2++;
+      shr_Xsig(&fix_up, 65 + exp2);
+
+      add_Xsig_Xsig(&accumulator, &fix_up);
+
+      echange = round_Xsig(&accumulator);
+
+      result->exp = exp2 + EXP_BIAS + echange;
+      *(short *)&(result->sign) = 0;      /* Is a valid positive nr */
+      significand(result) = XSIG_LL(accumulator);
+    }
+
+#ifdef PARANOID
+  if ( (result->exp >= EXP_BIAS)
+      && (significand(result) > 0x8000000000000000LL) )
+    {
+      EXCEPTION(EX_INTERNAL|0x151);
+    }
+#endif PARANOID
+
+}
diff --git a/arch/i386/math-emu/poly_tan.c b/arch/i386/math-emu/poly_tan.c
new file mode 100644
index 000000000..d9b09e438
--- /dev/null
+++ b/arch/i386/math-emu/poly_tan.c
@@ -0,0 +1,213 @@
+/*---------------------------------------------------------------------------+
+ |  poly_tan.c                                                               |
+ |                                                                           |
+ | Compute the tan of a FPU_REG, using a polynomial approximation.           |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "control_w.h"
+#include "poly.h"
+
+
+#define	HiPOWERop	3	/* odd poly, positive terms */
+static const unsigned long long oddplterm[HiPOWERop] =
+{
+  0x0000000000000000LL,
+  0x0051a1cf08fca228LL,
+  0x0000000071284ff7LL
+};
+
+#define	HiPOWERon	2	/* odd poly, negative terms */
+static const unsigned long long oddnegterm[HiPOWERon] =
+{
+   0x1291a9a184244e80LL,
+   0x0000583245819c21LL
+};
+
+#define	HiPOWERep	2	/* even poly, positive terms */
+static const unsigned long long evenplterm[HiPOWERep] =
+{
+  0x0e848884b539e888LL,
+  0x00003c7f18b887daLL
+};
+
+#define	HiPOWERen	2	/* even poly, negative terms */
+static const unsigned long long evennegterm[HiPOWERen] =
+{
+  0xf1f0200fd51569ccLL,
+  0x003afb46105c4432LL
+};
+
+static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL;
+
+
+/*--- poly_tan() ------------------------------------------------------------+
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+void	poly_tan(FPU_REG const *arg, FPU_REG *result)
+{
+  long int    		exponent;
+  int                   invert;
+  Xsig                  argSq, argSqSq, accumulatoro, accumulatore, accum,
+                        argSignif, fix_up;
+  unsigned long         adj;
+
+  exponent = arg->exp - EXP_BIAS;
+
+#ifdef PARANOID
+  if ( arg->sign != 0 )	/* Can't hack a number < 0.0 */
+    { arith_invalid(result); return; }  /* Need a positive number */
+#endif PARANOID
+
+  /* Split the problem into two domains, smaller and larger than pi/4 */
+  if ( (exponent == 0) || ((exponent == -1) && (arg->sigh > 0xc90fdaa2)) )
+    {
+      /* The argument is greater than (approx) pi/4 */
+      invert = 1;
+      accum.lsw = 0;
+      XSIG_LL(accum) = significand(arg);
+ 
+      if ( exponent == 0 )
+	{
+	  /* The argument is >= 1.0 */
+	  /* Put the binary point at the left. */
+	  XSIG_LL(accum) <<= 1;
+	}
+      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+      XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum);
+
+      argSignif.lsw = accum.lsw;
+      XSIG_LL(argSignif) = XSIG_LL(accum);
+      exponent = -1 + norm_Xsig(&argSignif);
+    }
+  else
+    {
+      invert = 0;
+      argSignif.lsw = 0;
+      XSIG_LL(accum) = XSIG_LL(argSignif) = significand(arg);
+ 
+      if ( exponent < -1 )
+	{
+	  /* shift the argument right by the required places */
+	  if ( shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U )
+	    XSIG_LL(accum) ++;	/* round up */
+	}
+    }
+
+  XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw;
+  mul_Xsig_Xsig(&argSq, &argSq);
+  XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw;
+  mul_Xsig_Xsig(&argSqSq, &argSqSq);
+
+  /* Compute the negative terms for the numerator polynomial */
+  accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0;
+  polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1);
+  mul_Xsig_Xsig(&accumulatoro, &argSq);
+  negate_Xsig(&accumulatoro);
+  /* Add the positive terms */
+  polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1);
+
+  
+  /* Compute the positive terms for the denominator polynomial */
+  accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0;
+  polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1);
+  mul_Xsig_Xsig(&accumulatore, &argSq);
+  negate_Xsig(&accumulatore);
+  /* Add the negative terms */
+  polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1);
+  /* Multiply by arg^2 */
+  mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
+  mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
+  /* de-normalize and divide by 2 */
+  shr_Xsig(&accumulatore, -2*(1+exponent) + 1);
+  negate_Xsig(&accumulatore);      /* This does 1 - accumulator */
+
+  /* Now find the ratio. */
+  if ( accumulatore.msw == 0 )
+    {
+      /* accumulatoro must contain 1.0 here, (actually, 0) but it
+	 really doesn't matter what value we use because it will
+	 have negligible effect in later calculations
+	 */
+      XSIG_LL(accum) = 0x8000000000000000LL;
+      accum.lsw = 0;
+    }
+  else
+    {
+      div_Xsig(&accumulatoro, &accumulatore, &accum);
+    }
+
+  /* Multiply by 1/3 * arg^3 */
+  mul64_Xsig(&accum, &XSIG_LL(argSignif));
+  mul64_Xsig(&accum, &XSIG_LL(argSignif));
+  mul64_Xsig(&accum, &XSIG_LL(argSignif));
+  mul64_Xsig(&accum, &twothirds);
+  shr_Xsig(&accum, -2*(exponent+1));
+
+  /* tan(arg) = arg + accum */
+  add_two_Xsig(&accum, &argSignif, &exponent);
+
+  if ( invert )
+    {
+      /* We now have the value of tan(pi_2 - arg) where pi_2 is an
+	 approximation for pi/2
+	 */
+      /* The next step is to fix the answer to compensate for the
+	 error due to the approximation used for pi/2
+	 */
+
+      /* This is (approx) delta, the error in our approx for pi/2
+	 (see above). It has an exponent of -65
+	 */
+      XSIG_LL(fix_up) = 0x898cc51701b839a2LL;
+      fix_up.lsw = 0;
+
+      if ( exponent == 0 )
+	adj = 0xffffffff;   /* We want approx 1.0 here, but
+			       this is close enough. */
+      else if ( exponent > -30 )
+	{
+	  adj = accum.msw >> -(exponent+1);      /* tan */
+	  mul_32_32(adj, adj, &adj);           /* tan^2 */
+	}
+      else
+	adj = 0;
+      mul_32_32(0x898cc517, adj, &adj);        /* delta * tan^2 */
+
+      fix_up.msw += adj;
+      if ( !(fix_up.msw & 0x80000000) )   /* did fix_up overflow ? */
+	{
+	  /* Yes, we need to add an msb */
+	  shr_Xsig(&fix_up, 1);
+	  fix_up.msw |= 0x80000000;
+	  shr_Xsig(&fix_up, 64 + exponent);
+	}
+      else
+	shr_Xsig(&fix_up, 65 + exponent);
+
+      add_two_Xsig(&accum, &fix_up, &exponent);
+
+      /* accum now contains tan(pi/2 - arg).
+	 Use tan(arg) = 1.0 / tan(pi/2 - arg)
+	 */
+      accumulatoro.lsw = accumulatoro.midw = 0;
+      accumulatoro.msw = 0x80000000;
+      div_Xsig(&accumulatoro, &accum, &accum);
+      exponent = - exponent - 1;
+    }
+
+  /* Transfer the result */
+  round_Xsig(&accum);
+  *(short *)&(result->sign) = 0;
+  significand(result) = XSIG_LL(accum);
+  result->exp = EXP_BIAS + exponent;
+
+}
diff --git a/arch/i386/math-emu/polynom_Xsig.S b/arch/i386/math-emu/polynom_Xsig.S
new file mode 100644
index 000000000..585221f96
--- /dev/null
+++ b/arch/i386/math-emu/polynom_Xsig.S
@@ -0,0 +1,137 @@
+/*---------------------------------------------------------------------------+
+ |  polynomial_Xsig.S                                                        |
+ |                                                                           |
+ | Fixed point arithmetic polynomial evaluation.                             |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 |
+ |                        unsigned long long terms[], int n)                 |
+ |                                                                           |
+ | Computes:                                                                 |
+ | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  |
+ | and adds the result to the 12 byte Xsig.                                  |
+ | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
+ | precision.                                                                |
+ |                                                                           |
+ | This function must be used carefully: most overflow of intermediate       |
+ | results is controlled, but overflow of the result is not.                 |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+	.file	"polynomial_Xsig.S"
+
+#include "fpu_asm.h"
+
+
+#define	TERM_SIZE	$8
+#define	SUM_MS		-20(%ebp)	/* sum ms long */
+#define SUM_MIDDLE	-24(%ebp)	/* sum middle long */
+#define	SUM_LS		-28(%ebp)	/* sum ls long */
+#define	ACCUM_MS	-4(%ebp)	/* accum ms long */
+#define	ACCUM_MIDDLE	-8(%ebp)	/* accum middle long */
+#define	ACCUM_LS	-12(%ebp)	/* accum ls long */
+#define OVERFLOWED      -16(%ebp)	/* addition overflow flag */
+
+.text
+	.align 2,144
+.globl _polynomial_Xsig
+_polynomial_Xsig:
+	pushl	%ebp
+	movl	%esp,%ebp
+	subl	$32,%esp
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM2,%esi		/* x */
+	movl	PARAM3,%edi		/* terms */
+
+	movl	TERM_SIZE,%eax
+	mull	PARAM4			/* n */
+	addl	%eax,%edi
+
+	movl	4(%edi),%edx		/* terms[n] */
+	movl	%edx,SUM_MS
+	movl	(%edi),%edx		/* terms[n] */
+	movl	%edx,SUM_MIDDLE
+	xor	%eax,%eax
+	movl	%eax,SUM_LS
+	movb	%al,OVERFLOWED
+
+	subl	TERM_SIZE,%edi
+	decl	PARAM4
+	js	L_accum_done
+
+L_accum_loop:
+	xor	%eax,%eax
+	movl	%eax,ACCUM_MS
+	movl	%eax,ACCUM_MIDDLE
+
+	movl	SUM_MIDDLE,%eax
+	mull	(%esi)			/* x ls long */
+	movl	%edx,ACCUM_LS
+
+	movl	SUM_MIDDLE,%eax
+	mull	4(%esi)			/* x ms long */
+	addl	%eax,ACCUM_LS
+	adcl	%edx,ACCUM_MIDDLE
+	adcl	$0,ACCUM_MS
+
+	movl	SUM_MS,%eax
+	mull	(%esi)			/* x ls long */
+	addl	%eax,ACCUM_LS
+	adcl	%edx,ACCUM_MIDDLE
+	adcl	$0,ACCUM_MS
+
+	movl	SUM_MS,%eax
+	mull	4(%esi)			/* x ms long */
+	addl	%eax,ACCUM_MIDDLE
+	adcl	%edx,ACCUM_MS
+
+	testb	$0xff,OVERFLOWED
+	jz	L_no_overflow
+
+	movl	(%esi),%eax
+	addl	%eax,ACCUM_MIDDLE
+	movl	4(%esi),%eax
+	adcl	%eax,ACCUM_MS		/* This could overflow too */
+
+L_no_overflow:
+
+/*
+ * Now put the sum of next term and the accumulator
+ * into the sum register
+ */
+	movl	ACCUM_LS,%eax
+	addl	(%edi),%eax		/* term ls long */
+	movl	%eax,SUM_LS
+	movl	ACCUM_MIDDLE,%eax
+	adcl	(%edi),%eax		/* term ls long */
+	movl	%eax,SUM_MIDDLE
+	movl	ACCUM_MS,%eax
+	adcl	4(%edi),%eax		/* term ms long */
+	movl	%eax,SUM_MS
+	sbbb	%al,%al
+	movb	%al,OVERFLOWED		/* Used in the next iteration */
+
+	subl	TERM_SIZE,%edi
+	decl	PARAM4
+	jns	L_accum_loop
+
+L_accum_done:
+	movl	PARAM1,%edi		/* accum */
+	movl	SUM_LS,%eax
+	addl	%eax,(%edi)
+	movl	SUM_MIDDLE,%eax
+	adcl	%eax,4(%edi)
+	movl	SUM_MS,%eax
+	adcl	%eax,8(%edi)
+
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	leave
+	ret
diff --git a/arch/i386/math-emu/reg_add_sub.c b/arch/i386/math-emu/reg_add_sub.c
new file mode 100644
index 000000000..d70889b40
--- /dev/null
+++ b/arch/i386/math-emu/reg_add_sub.c
@@ -0,0 +1,318 @@
+/*---------------------------------------------------------------------------+
+ |  reg_add_sub.c                                                            |
+ |                                                                           |
+ | Functions to add or subtract two registers and put the result in a third. |
+ |                                                                           |
+ | Copyright (C) 1992,1993                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | For each function, the destination may be any FPU_REG, including one of   |
+ | the source FPU_REGs.                                                      |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "control_w.h"
+#include "fpu_system.h"
+
+
+int reg_add(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest, int control_w)
+{
+  char saved_sign = dest->sign;
+  int diff;
+  
+  if ( !(a->tag | b->tag) )
+    {
+      /* Both registers are valid */
+      if (!(a->sign ^ b->sign))
+	{
+	  /* signs are the same */
+	  dest->sign = a->sign;
+	  if ( reg_u_add(a, b, dest, control_w) )
+	    {
+	      dest->sign = saved_sign;
+	      return 1;
+	    }
+	  return 0;
+	}
+      
+      /* The signs are different, so do a subtraction */
+      diff = a->exp - b->exp;
+      if (!diff)
+	{
+	  diff = a->sigh - b->sigh;  /* Works only if ms bits are identical */
+	  if (!diff)
+	    {
+	      diff = a->sigl > b->sigl;
+	      if (!diff)
+		diff = -(a->sigl < b->sigl);
+	    }
+	}
+      
+      if (diff > 0)
+	{
+	  dest->sign = a->sign;
+	  if ( reg_u_sub(a, b, dest, control_w) )
+	    {
+	      dest->sign = saved_sign;
+	      return 1;
+	    }
+	}
+      else if ( diff == 0 )
+	{
+#ifdef DENORM_OPERAND
+	  if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) &&
+	      denormal_operand() )
+	    return 1;
+#endif DENORM_OPERAND
+	  reg_move(&CONST_Z, dest);
+	  /* sign depends upon rounding mode */
+	  dest->sign = ((control_w & CW_RC) != RC_DOWN)
+	    ? SIGN_POS : SIGN_NEG;
+	}
+      else
+	{
+	  dest->sign = b->sign;
+	  if ( reg_u_sub(b, a, dest, control_w) )
+	    {
+	      dest->sign = saved_sign;
+	      return 1;
+	    }
+	}
+      return 0;
+    }
+  else
+    {
+      if ( (a->tag == TW_NaN) || (b->tag == TW_NaN) )
+	{ return real_2op_NaN(a, b, dest); }
+      else if (a->tag == TW_Zero)
+	{
+	  if (b->tag == TW_Zero)
+	    {
+	      char different_signs = a->sign ^ b->sign;
+	      /* Both are zero, result will be zero. */
+	      reg_move(a, dest);
+	      if (different_signs)
+		{
+		  /* Signs are different. */
+		  /* Sign of answer depends upon rounding mode. */
+		  dest->sign = ((control_w & CW_RC) != RC_DOWN)
+		    ? SIGN_POS : SIGN_NEG;
+		}
+	    }
+	  else
+	    {
+#ifdef DENORM_OPERAND
+	      if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) &&
+		  denormal_operand() )
+		return 1;
+#endif DENORM_OPERAND
+	      reg_move(b, dest);
+	    }
+	  return 0;
+	}
+      else if (b->tag == TW_Zero)
+	{
+#ifdef DENORM_OPERAND
+	  if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) &&
+	      denormal_operand() )
+	    return 1;
+#endif DENORM_OPERAND
+	  reg_move(a, dest); return 0;
+	}
+      else if (a->tag == TW_Infinity)
+	{
+	  if (b->tag != TW_Infinity)
+	    {
+#ifdef DENORM_OPERAND
+	      if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) &&
+		  denormal_operand() )
+		return 1;
+#endif DENORM_OPERAND
+	      reg_move(a, dest); return 0;
+	    }
+	  if (a->sign == b->sign)
+	    {
+	      /* They are both + or - infinity */
+	      reg_move(a, dest); return 0;
+	    }
+	  return arith_invalid(dest);	/* Infinity-Infinity is undefined. */
+	}
+      else if (b->tag == TW_Infinity)
+	{
+#ifdef DENORM_OPERAND
+	  if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) &&
+	      denormal_operand() )
+	    return 1;
+#endif DENORM_OPERAND
+	  reg_move(b, dest); return 0;
+	}
+    }
+#ifdef PARANOID
+  EXCEPTION(EX_INTERNAL|0x101);
+#endif
+  return 1;
+}
+
+
+/* Subtract b from a.  (a-b) -> dest */
+int reg_sub(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest, int control_w)
+{
+  char saved_sign = dest->sign;
+  int diff;
+
+  if ( !(a->tag | b->tag) )
+    {
+      /* Both registers are valid */
+      diff = a->exp - b->exp;
+      if (!diff)
+	{
+	  diff = a->sigh - b->sigh;  /* Works only if ms bits are identical */
+	  if (!diff)
+	    {
+	      diff = a->sigl > b->sigl;
+	      if (!diff)
+		diff = -(a->sigl < b->sigl);
+	    }
+	}
+
+      switch (a->sign*2 + b->sign)
+	{
+	case 0: /* P - P */
+	case 3: /* N - N */
+	  if (diff > 0)
+	    {
+	      /* |a| > |b| */
+	      dest->sign = a->sign;
+	      if ( reg_u_sub(a, b, dest, control_w) )
+		{
+		  dest->sign = saved_sign;
+		  return 1;
+		}
+	      return 0;
+	    }
+	  else if ( diff == 0 )
+	    {
+#ifdef DENORM_OPERAND
+	      if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) &&
+		  denormal_operand() )
+		return 1;
+#endif DENORM_OPERAND
+	      reg_move(&CONST_Z, dest);
+	      /* sign depends upon rounding mode */
+	      dest->sign = ((control_w & CW_RC) != RC_DOWN)
+		? SIGN_POS : SIGN_NEG;
+	    }
+	  else
+	    {
+	      dest->sign = a->sign ^ SIGN_POS^SIGN_NEG;
+	      if ( reg_u_sub(b, a, dest, control_w) )
+		{
+		  dest->sign = saved_sign;
+		  return 1;
+		}
+	    }
+	  break;
+	case 1: /* P - N */
+	  dest->sign = SIGN_POS;
+	  if ( reg_u_add(a, b, dest, control_w) )
+	    {
+	      dest->sign = saved_sign;
+	      return 1;
+	    }
+	  break;
+	case 2: /* N - P */
+	  dest->sign = SIGN_NEG;
+	  if ( reg_u_add(a, b, dest, control_w) )
+	    {
+	      dest->sign = saved_sign;
+	      return 1;
+	    }
+	  break;
+	}
+      return 0;
+    }
+  else
+    {
+      if ( (a->tag == TW_NaN) || (b->tag == TW_NaN) )
+	{ return real_2op_NaN(b, a, dest); }
+      else if (b->tag == TW_Zero)
+	{ 
+	  if (a->tag == TW_Zero)
+	    {
+	      char same_signs = !(a->sign ^ b->sign);
+	      /* Both are zero, result will be zero. */
+	      reg_move(a, dest); /* Answer for different signs. */
+	      if (same_signs)
+		{
+		  /* Sign depends upon rounding mode */
+		  dest->sign = ((control_w & CW_RC) != RC_DOWN)
+		    ? SIGN_POS : SIGN_NEG;
+		}
+	    }
+	  else
+	    {
+#ifdef DENORM_OPERAND
+	      if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) &&
+		  denormal_operand() )
+		return 1;
+#endif DENORM_OPERAND
+	      reg_move(a, dest);
+	    }
+	  return 0;
+	}
+      else if (a->tag == TW_Zero)
+	{
+#ifdef DENORM_OPERAND
+	  if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) &&
+	      denormal_operand() )
+	    return 1;
+#endif DENORM_OPERAND
+	  reg_move(b, dest);
+	  dest->sign ^= SIGN_POS^SIGN_NEG;
+	  return 0;
+	}
+      else if (a->tag == TW_Infinity)
+	{
+	  if (b->tag != TW_Infinity)
+	    {
+#ifdef DENORM_OPERAND
+	      if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) &&
+		  denormal_operand() )
+		return 1;
+#endif DENORM_OPERAND
+	      reg_move(a, dest); return 0;
+	    }
+	  /* Both args are Infinity */
+	  if (a->sign == b->sign)
+	    {
+	      /* Infinity-Infinity is undefined. */
+	      return arith_invalid(dest);
+	    }
+	  reg_move(a, dest);
+	  return 0;
+	}
+      else if (b->tag == TW_Infinity)
+	{
+#ifdef DENORM_OPERAND
+	  if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) &&
+	      denormal_operand() )
+	    return 1;
+#endif DENORM_OPERAND
+	  reg_move(b, dest);
+	  dest->sign ^= SIGN_POS^SIGN_NEG;
+	  return 0;
+	}
+    }
+#ifdef PARANOID
+  EXCEPTION(EX_INTERNAL|0x110);
+#endif
+  return 1;
+}
+
diff --git a/arch/i386/math-emu/reg_compare.c b/arch/i386/math-emu/reg_compare.c
new file mode 100644
index 000000000..eb4a1fa99
--- /dev/null
+++ b/arch/i386/math-emu/reg_compare.c
@@ -0,0 +1,378 @@
+/*---------------------------------------------------------------------------+
+ |  reg_compare.c                                                            |
+ |                                                                           |
+ | Compare two floating point registers                                      |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | compare() is the core FPU_REG comparison function                         |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_system.h"
+#include "exception.h"
+#include "fpu_emu.h"
+#include "control_w.h"
+#include "status_w.h"
+
+
+int compare(FPU_REG const *b)
+{
+  int diff;
+  char	       st0_tag;
+  FPU_REG      *st0_ptr;
+
+  st0_ptr = &st(0);
+  st0_tag = st0_ptr->tag;
+
+  if ( st0_tag | b->tag )
+    {
+      if ( st0_tag == TW_Zero )
+	{
+	  if ( b->tag == TW_Zero ) return COMP_A_eq_B;
+	  if ( b->tag == TW_Valid )
+	    {
+	      return ((b->sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
+#ifdef DENORM_OPERAND
+		| ((b->exp <= EXP_UNDER) ?
+		   COMP_Denormal : 0)
+#endif DENORM_OPERAND
+		  ;
+	    }
+	}
+      else if ( b->tag == TW_Zero )
+	{
+	  if ( st0_tag == TW_Valid )
+	    {
+	      return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B
+		      : COMP_A_lt_B)
+#ifdef DENORM_OPERAND
+		| ((st0_ptr->exp <= EXP_UNDER )
+		   ? COMP_Denormal : 0 )
+#endif DENORM_OPERAND
+		  ;
+	    }
+	}
+
+      if ( st0_tag == TW_Infinity )
+	{
+	  if ( (b->tag == TW_Valid) || (b->tag == TW_Zero) )
+	    {
+	      return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B
+		      : COMP_A_lt_B)
+#ifdef DENORM_OPERAND
+	      | (((b->tag == TW_Valid) && (b->exp <= EXP_UNDER)) ?
+		COMP_Denormal : 0 )
+#endif DENORM_OPERAND
+;
+	    }
+	  else if ( b->tag == TW_Infinity )
+	    {
+	      /* The 80486 book says that infinities can be equal! */
+	      return (st0_ptr->sign == b->sign) ? COMP_A_eq_B :
+		((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
+	    }
+	  /* Fall through to the NaN code */
+	}
+      else if ( b->tag == TW_Infinity )
+	{
+	  if ( (st0_tag == TW_Valid) || (st0_tag == TW_Zero) )
+	    {
+	      return ((b->sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
+#ifdef DENORM_OPERAND
+		| (((st0_tag == TW_Valid)
+		    && (st0_ptr->exp <= EXP_UNDER)) ?
+		   COMP_Denormal : 0)
+#endif DENORM_OPERAND
+		  ;
+	    }
+	  /* Fall through to the NaN code */
+	}
+
+      /* The only possibility now should be that one of the arguments
+	 is a NaN */
+      if ( (st0_tag == TW_NaN) || (b->tag == TW_NaN) )
+	{
+	  if ( ((st0_tag == TW_NaN) && !(st0_ptr->sigh & 0x40000000))
+	      || ((b->tag == TW_NaN) && !(b->sigh & 0x40000000)) )
+	    /* At least one arg is a signaling NaN */
+	    return COMP_No_Comp | COMP_SNaN | COMP_NaN;
+	  else
+	    /* Neither is a signaling NaN */
+	    return COMP_No_Comp | COMP_NaN;
+	}
+      
+      EXCEPTION(EX_Invalid);
+    }
+  
+#ifdef PARANOID
+  if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
+  if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
+#endif PARANOID
+
+  
+  if (st0_ptr->sign != b->sign)
+    {
+      return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+#ifdef DENORM_OPERAND
+	|
+	  ( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ?
+	   COMP_Denormal : 0)
+#endif DENORM_OPERAND
+	    ;
+    }
+
+  diff = st0_ptr->exp - b->exp;
+  if ( diff == 0 )
+    {
+      diff = st0_ptr->sigh - b->sigh;  /* Works only if ms bits are
+					      identical */
+      if ( diff == 0 )
+	{
+	diff = st0_ptr->sigl > b->sigl;
+	if ( diff == 0 )
+	  diff = -(st0_ptr->sigl < b->sigl);
+	}
+    }
+
+  if ( diff > 0 )
+    {
+      return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+#ifdef DENORM_OPERAND
+	|
+	  ( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ?
+	   COMP_Denormal : 0)
+#endif DENORM_OPERAND
+	    ;
+    }
+  if ( diff < 0 )
+    {
+      return ((st0_ptr->sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
+#ifdef DENORM_OPERAND
+	|
+	  ( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ?
+	   COMP_Denormal : 0)
+#endif DENORM_OPERAND
+	    ;
+    }
+
+  return COMP_A_eq_B
+#ifdef DENORM_OPERAND
+    |
+      ( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ?
+       COMP_Denormal : 0)
+#endif DENORM_OPERAND
+	;
+
+}
+
+
+/* This function requires that st(0) is not empty */
+int compare_st_data(FPU_REG const *loaded_data)
+{
+  int f, c;
+
+  c = compare(loaded_data);
+
+  if (c & COMP_NaN)
+    {
+      EXCEPTION(EX_Invalid);
+      f = SW_C3 | SW_C2 | SW_C0;
+    }
+  else
+    switch (c & 7)
+      {
+      case COMP_A_lt_B:
+	f = SW_C0;
+	break;
+      case COMP_A_eq_B:
+	f = SW_C3;
+	break;
+      case COMP_A_gt_B:
+	f = 0;
+	break;
+      case COMP_No_Comp:
+	f = SW_C3 | SW_C2 | SW_C0;
+	break;
+#ifdef PARANOID
+      default:
+	EXCEPTION(EX_INTERNAL|0x121);
+	f = SW_C3 | SW_C2 | SW_C0;
+	break;
+#endif PARANOID
+      }
+  setcc(f);
+  if (c & COMP_Denormal)
+    {
+      return denormal_operand();
+    }
+  return 0;
+}
+
+
+static int compare_st_st(int nr)
+{
+  int f, c;
+
+  if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
+    {
+      setcc(SW_C3 | SW_C2 | SW_C0);
+      /* Stack fault */
+      EXCEPTION(EX_StackUnder);
+      return !(control_word & CW_Invalid);
+    }
+
+  c = compare(&st(nr));
+  if (c & COMP_NaN)
+    {
+      setcc(SW_C3 | SW_C2 | SW_C0);
+      EXCEPTION(EX_Invalid);
+      return !(control_word & CW_Invalid);
+    }
+  else
+    switch (c & 7)
+      {
+      case COMP_A_lt_B:
+	f = SW_C0;
+	break;
+      case COMP_A_eq_B:
+	f = SW_C3;
+	break;
+      case COMP_A_gt_B:
+	f = 0;
+	break;
+      case COMP_No_Comp:
+	f = SW_C3 | SW_C2 | SW_C0;
+	break;
+#ifdef PARANOID
+      default:
+	EXCEPTION(EX_INTERNAL|0x122);
+	f = SW_C3 | SW_C2 | SW_C0;
+	break;
+#endif PARANOID
+      }
+  setcc(f);
+  if (c & COMP_Denormal)
+    {
+      return denormal_operand();
+    }
+  return 0;
+}
+
+
+static int compare_u_st_st(int nr)
+{
+  int f, c;
+
+  if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
+    {
+      setcc(SW_C3 | SW_C2 | SW_C0);
+      /* Stack fault */
+      EXCEPTION(EX_StackUnder);
+      return !(control_word & CW_Invalid);
+    }
+
+  c = compare(&st(nr));
+  if (c & COMP_NaN)
+    {
+      setcc(SW_C3 | SW_C2 | SW_C0);
+      if (c & COMP_SNaN)       /* This is the only difference between
+				  un-ordered and ordinary comparisons */
+	{
+	  EXCEPTION(EX_Invalid);
+	  return !(control_word & CW_Invalid);
+	}
+      return 0;
+    }
+  else
+    switch (c & 7)
+      {
+      case COMP_A_lt_B:
+	f = SW_C0;
+	break;
+      case COMP_A_eq_B:
+	f = SW_C3;
+	break;
+      case COMP_A_gt_B:
+	f = 0;
+	break;
+      case COMP_No_Comp:
+	f = SW_C3 | SW_C2 | SW_C0;
+	break;
+#ifdef PARANOID
+      default:
+	EXCEPTION(EX_INTERNAL|0x123);
+	f = SW_C3 | SW_C2 | SW_C0;
+	break;
+#endif PARANOID
+      }
+  setcc(f);
+  if (c & COMP_Denormal)
+    {
+      return denormal_operand();
+    }
+  return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+void fcom_st()
+{
+  /* fcom st(i) */
+  compare_st_st(FPU_rm);
+}
+
+
+void fcompst()
+{
+  /* fcomp st(i) */
+  if ( !compare_st_st(FPU_rm) )
+    pop();
+}
+
+
+void fcompp()
+{
+  /* fcompp */
+  if (FPU_rm != 1)
+    {
+      FPU_illegal();
+      return;
+    }
+  if ( !compare_st_st(1) )
+      poppop();
+}
+
+
+void fucom_()
+{
+  /* fucom st(i) */
+  compare_u_st_st(FPU_rm);
+
+}
+
+
+void fucomp()
+{
+  /* fucomp st(i) */
+  if ( !compare_u_st_st(FPU_rm) )
+    pop();
+}
+
+
+void fucompp()
+{
+  /* fucompp */
+  if (FPU_rm == 1)
+    {
+      if ( !compare_u_st_st(1) )
+	poppop();
+    }
+  else
+    FPU_illegal();
+}
diff --git a/arch/i386/math-emu/reg_constant.c b/arch/i386/math-emu/reg_constant.c
new file mode 100644
index 000000000..c1981ce24
--- /dev/null
+++ b/arch/i386/math-emu/reg_constant.c
@@ -0,0 +1,116 @@
+/*---------------------------------------------------------------------------+
+ |  reg_constant.c                                                           |
+ |                                                                           |
+ | All of the constant FPU_REGs                                              |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_system.h"
+#include "fpu_emu.h"
+#include "status_w.h"
+#include "reg_constant.h"
+
+
+FPU_REG const CONST_1    = { SIGN_POS, TW_Valid, EXP_BIAS,
+			    0x00000000, 0x80000000 };
+FPU_REG const CONST_2    = { SIGN_POS, TW_Valid, EXP_BIAS+1,
+			    0x00000000, 0x80000000 };
+FPU_REG const CONST_HALF = { SIGN_POS, TW_Valid, EXP_BIAS-1,
+			    0x00000000, 0x80000000 };
+FPU_REG const CONST_L2T  = { SIGN_POS, TW_Valid, EXP_BIAS+1,
+			    0xcd1b8afe, 0xd49a784b };
+FPU_REG const CONST_L2E  = { SIGN_POS, TW_Valid, EXP_BIAS,
+			    0x5c17f0bc, 0xb8aa3b29 };
+FPU_REG const CONST_PI   = { SIGN_POS, TW_Valid, EXP_BIAS+1,
+			    0x2168c235, 0xc90fdaa2 };
+FPU_REG const CONST_PI2  = { SIGN_POS, TW_Valid, EXP_BIAS,
+			    0x2168c235, 0xc90fdaa2 };
+FPU_REG const CONST_PI4  = { SIGN_POS, TW_Valid, EXP_BIAS-1,
+			    0x2168c235, 0xc90fdaa2 };
+FPU_REG const CONST_LG2  = { SIGN_POS, TW_Valid, EXP_BIAS-2,
+			    0xfbcff799, 0x9a209a84 };
+FPU_REG const CONST_LN2  = { SIGN_POS, TW_Valid, EXP_BIAS-1,
+			    0xd1cf79ac, 0xb17217f7 };
+
+/* Extra bits to take pi/2 to more than 128 bits precision. */
+FPU_REG const CONST_PI2extra = { SIGN_NEG, TW_Valid, EXP_BIAS-66,
+			    0xfc8f8cbb, 0xece675d1 };
+
+/* Only the sign (and tag) is used in internal zeroes */
+FPU_REG const CONST_Z    = { SIGN_POS, TW_Zero, EXP_UNDER, 0x0, 0x0 };
+
+/* Only the sign and significand (and tag) are used in internal NaNs */
+/* The 80486 never generates one of these 
+FPU_REG const CONST_SNAN = { SIGN_POS, TW_NaN, EXP_OVER, 0x00000001, 0x80000000 };
+ */
+/* This is the real indefinite QNaN */
+FPU_REG const CONST_QNaN = { SIGN_NEG, TW_NaN, EXP_OVER, 0x00000000, 0xC0000000 };
+
+/* Only the sign (and tag) is used in internal infinities */
+FPU_REG const CONST_INF  = { SIGN_POS, TW_Infinity, EXP_OVER, 0x00000000, 0x80000000 };
+
+
+
+static void fld_const(FPU_REG const *c)
+{
+  FPU_REG *st_new_ptr;
+
+  if ( STACK_OVERFLOW )
+    {
+      stack_overflow();
+      return;
+    }
+  push();
+  reg_move(c, st_new_ptr);
+  clear_C1();
+}
+
+
+static void fld1(void)
+{
+  fld_const(&CONST_1);
+}
+
+static void fldl2t(void)
+{
+  fld_const(&CONST_L2T);
+}
+
+static void fldl2e(void)
+{
+  fld_const(&CONST_L2E);
+}
+
+static void fldpi(void)
+{
+  fld_const(&CONST_PI);
+}
+
+static void fldlg2(void)
+{
+  fld_const(&CONST_LG2);
+}
+
+static void fldln2(void)
+{
+  fld_const(&CONST_LN2);
+}
+
+static void fldz(void)
+{
+  fld_const(&CONST_Z);
+}
+
+static FUNC constants_table[] = {
+  fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, FPU_illegal
+};
+
+void fconst(void)
+{
+  (constants_table[FPU_rm])();
+}
diff --git a/arch/i386/math-emu/reg_constant.h b/arch/i386/math-emu/reg_constant.h
new file mode 100644
index 000000000..b7db97e34
--- /dev/null
+++ b/arch/i386/math-emu/reg_constant.h
@@ -0,0 +1,31 @@
+/*---------------------------------------------------------------------------+
+ |  reg_constant.h                                                           |
+ |                                                                           |
+ | Copyright (C) 1992    W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#ifndef _REG_CONSTANT_H_
+#define _REG_CONSTANT_H_
+
+#include "fpu_emu.h"
+
+extern FPU_REG const CONST_1;
+extern FPU_REG const CONST_2;
+extern FPU_REG const CONST_HALF;
+extern FPU_REG const CONST_L2T;
+extern FPU_REG const CONST_L2E;
+extern FPU_REG const CONST_PI;
+extern FPU_REG const CONST_PI2;
+extern FPU_REG const CONST_PI2extra;
+extern FPU_REG const CONST_PI4;
+extern FPU_REG const CONST_LG2;
+extern FPU_REG const CONST_LN2;
+extern FPU_REG const CONST_Z;
+extern FPU_REG const CONST_PINF;
+extern FPU_REG const CONST_INF;
+extern FPU_REG const CONST_MINF;
+extern FPU_REG const CONST_QNaN;
+
+#endif _REG_CONSTANT_H_
diff --git a/arch/i386/math-emu/reg_div.S b/arch/i386/math-emu/reg_div.S
new file mode 100644
index 000000000..2fbc5f7c4
--- /dev/null
+++ b/arch/i386/math-emu/reg_div.S
@@ -0,0 +1,251 @@
+	.file	"reg_div.S"
+/*---------------------------------------------------------------------------+
+ |  reg_div.S                                                                |
+ |                                                                           |
+ | Divide one FPU_REG by another and put the result in a destination FPU_REG.|
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |   void reg_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest,                     |
+ |                                    unsigned int control_word)             |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "fpu_asm.h"
+
+
+.text
+	.align 2
+
+.globl	_reg_div
+_reg_div:
+	pushl	%ebp
+	movl	%esp,%ebp
+#ifndef NON_REENTRANT_FPU
+	subl	$28,%esp	/* Needed by divide_kernel */
+#endif NON_REENTRANT_FPU
+
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM1,%esi
+	movl	PARAM2,%ebx
+	movl	PARAM3,%edi
+
+	movb	TAG(%esi),%al
+	orb	TAG(%ebx),%al
+
+	jne	L_div_special		/* Not (both numbers TW_Valid) */
+
+#ifdef DENORM_OPERAND
+/* Check for denormals */
+	cmpl	EXP_UNDER,EXP(%esi)
+	jg	xL_arg1_not_denormal
+
+	call	_denormal_operand
+	orl	%eax,%eax
+	jnz	fpu_Arith_exit
+
+xL_arg1_not_denormal:
+	cmpl	EXP_UNDER,EXP(%ebx)
+	jg	xL_arg2_not_denormal
+
+	call	_denormal_operand
+	orl	%eax,%eax
+	jnz	fpu_Arith_exit
+
+xL_arg2_not_denormal:
+#endif DENORM_OPERAND
+
+/* Both arguments are TW_Valid */
+	movb	TW_Valid,TAG(%edi)
+
+	movb	SIGN(%esi),%cl
+	cmpb	%cl,SIGN(%ebx)
+	setne	(%edi)	      /* Set the sign, requires SIGN_NEG=1, SIGN_POS=0 */
+
+	movl	EXP(%esi),%edx
+	movl	EXP(%ebx),%eax
+	subl	%eax,%edx
+	addl	EXP_BIAS,%edx
+	movl	%edx,EXP(%edi)
+
+	jmp	_divide_kernel
+
+
+/*-----------------------------------------------------------------------*/
+L_div_special:
+	cmpb	TW_NaN,TAG(%esi)	/* A NaN with anything to give NaN */
+	je	L_arg1_NaN
+
+	cmpb	TW_NaN,TAG(%ebx)	/* A NaN with anything to give NaN */
+	jne	L_no_NaN_arg
+
+/* Operations on NaNs */
+L_arg1_NaN:
+L_arg2_NaN:
+	pushl	%edi			/* Destination */
+	pushl	%esi
+	pushl	%ebx			/* Ordering is important here */
+	call	_real_2op_NaN
+	jmp	LDiv_exit
+
+/* Invalid operations */
+L_zero_zero:
+L_inf_inf:
+	pushl	%edi			/* Destination */
+	call	_arith_invalid		/* 0/0 or Infinity/Infinity */
+	jmp	LDiv_exit
+
+L_no_NaN_arg:
+	cmpb	TW_Infinity,TAG(%esi)
+	jne	L_arg1_not_inf
+
+	cmpb	TW_Infinity,TAG(%ebx)
+	je	L_inf_inf		/* invalid operation */
+
+	cmpb	TW_Valid,TAG(%ebx)
+	je	L_inf_valid
+
+#ifdef PARANOID
+	/* arg2 must be zero or valid */
+	cmpb	TW_Zero,TAG(%ebx)
+	ja	L_unknown_tags
+#endif PARANOID
+
+	/* Note that p16-9 says that infinity/0 returns infinity */
+	jmp	L_copy_arg1		/* Answer is Inf */
+
+L_inf_valid:
+#ifdef DENORM_OPERAND
+	cmpl	EXP_UNDER,EXP(%ebx)
+	jg	L_copy_arg1		/* Answer is Inf */
+
+	call	_denormal_operand
+	orl	%eax,%eax
+	jnz	fpu_Arith_exit
+#endif DENORM_OPERAND
+
+	jmp	L_copy_arg1		/* Answer is Inf */
+
+L_arg1_not_inf:
+	cmpb	TW_Zero,TAG(%ebx)	/* Priority to div-by-zero error */
+	jne	L_arg2_not_zero
+
+	cmpb	TW_Zero,TAG(%esi)
+	je	L_zero_zero		/* invalid operation */
+
+#ifdef PARANOID
+	/* arg1 must be valid */
+	cmpb	TW_Valid,TAG(%esi)
+	ja	L_unknown_tags
+#endif PARANOID
+
+/* Division by zero error */
+	pushl	%edi			/* destination */
+	movb	SIGN(%esi),%al
+	xorb	SIGN(%ebx),%al
+	pushl	%eax			/* lower 8 bits have the sign */
+	call	_divide_by_zero
+	jmp	LDiv_exit
+
+L_arg2_not_zero:
+	cmpb	TW_Infinity,TAG(%ebx)
+	jne	L_arg2_not_inf
+
+#ifdef DENORM_OPERAND
+	cmpb	TW_Valid,TAG(%esi)
+	jne	L_return_zero
+
+	cmpl	EXP_UNDER,EXP(%esi)
+	jg	L_return_zero		/* Answer is zero */
+
+	call	_denormal_operand
+	orl	%eax,%eax
+	jnz	fpu_Arith_exit
+#endif DENORM_OPERAND
+
+	jmp	L_return_zero		/* Answer is zero */
+
+L_arg2_not_inf:
+
+#ifdef PARANOID
+	cmpb	TW_Zero,TAG(%esi)
+	jne	L_unknown_tags
+#endif PARANOID
+
+	/* arg1 is zero, arg2 is not Infinity or a NaN */
+
+#ifdef DENORM_OPERAND
+	cmpl	EXP_UNDER,EXP(%ebx)
+	jg	L_copy_arg1		/* Answer is zero */
+
+	call	_denormal_operand
+	orl	%eax,%eax
+	jnz	fpu_Arith_exit
+#endif DENORM_OPERAND
+
+L_copy_arg1:
+	movb	TAG(%esi),%ax
+	movb	%ax,TAG(%edi)
+	movl	EXP(%esi),%eax
+	movl	%eax,EXP(%edi)
+	movl	SIGL(%esi),%eax
+	movl	%eax,SIGL(%edi)
+	movl	SIGH(%esi),%eax
+	movl	%eax,SIGH(%edi)
+
+LDiv_set_result_sign:
+	movb	SIGN(%esi),%cl
+	cmpb	%cl,SIGN(%ebx)
+	jne	LDiv_negative_result
+
+	movb	SIGN_POS,SIGN(%edi)
+	xorl	%eax,%eax		/* Valid result */
+	jmp	LDiv_exit
+
+LDiv_negative_result:
+	movb	SIGN_NEG,SIGN(%edi)
+	xorl	%eax,%eax		/* Valid result */
+
+LDiv_exit:
+#ifndef NON_REENTRANT_FPU
+	leal	-40(%ebp),%esp
+#else
+	leal	-12(%ebp),%esp
+#endif NON_REENTRANT_FPU
+
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	leave
+	ret
+
+
+L_return_zero:
+	xorl	%eax,%eax
+	movl	%eax,SIGH(%edi)
+	movl	%eax,SIGL(%edi)
+	movl	EXP_UNDER,EXP(%edi)
+	movb	TW_Zero,TAG(%edi)
+	jmp	LDiv_set_result_sign
+
+#ifdef PARANOID
+L_unknown_tags:
+	pushl	EX_INTERNAL | 0x208
+	call	EXCEPTION
+
+	/* Generate a NaN for unknown tags */
+	movl	_CONST_QNaN,%eax
+	movl	%eax,(%edi)
+	movl	_CONST_QNaN+4,%eax
+	movl	%eax,SIGL(%edi)
+	movl	_CONST_QNaN+8,%eax
+	movl	%eax,SIGH(%edi)
+	jmp	LDiv_exit		/* %eax is nz */
+#endif PARANOID
diff --git a/arch/i386/math-emu/reg_ld_str.c b/arch/i386/math-emu/reg_ld_str.c
new file mode 100644
index 000000000..efec9e010
--- /dev/null
+++ b/arch/i386/math-emu/reg_ld_str.c
@@ -0,0 +1,1438 @@
+/*---------------------------------------------------------------------------+
+ |  reg_ld_str.c                                                             |
+ |                                                                           |
+ | All of the functions which transfer data between user memory and FPU_REGs.|
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | Note:                                                                     |
+ |    The file contains code which accesses user memory.                     |
+ |    Emulator static data may change when user memory is accessed, due to   |
+ |    other processes using the emulator while swapping is in progress.      |
+ +---------------------------------------------------------------------------*/
+
+#include <asm/segment.h>
+
+#include "fpu_system.h"
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "control_w.h"
+#include "status_w.h"
+
+
+#define EXTENDED_Ebias 0x3fff
+#define EXTENDED_Emin (-0x3ffe)  /* smallest valid exponent */
+
+#define DOUBLE_Emax 1023         /* largest valid exponent */
+#define DOUBLE_Ebias 1023
+#define DOUBLE_Emin (-1022)      /* smallest valid exponent */
+
+#define SINGLE_Emax 127          /* largest valid exponent */
+#define SINGLE_Ebias 127
+#define SINGLE_Emin (-126)       /* smallest valid exponent */
+
+static void write_to_extended(FPU_REG *rp, char *d);
+
+
+/* Get a long double from user memory */
+int reg_load_extended(long double *s, FPU_REG *loaded_data)
+{
+  unsigned long sigl, sigh, exp;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_verify_area(VERIFY_READ, s, 10);
+  sigl = get_fs_long((unsigned long *) s);
+  sigh = get_fs_long(1 + (unsigned long *) s);
+  exp = get_fs_word(4 + (unsigned short *) s);
+  RE_ENTRANT_CHECK_ON;
+
+  loaded_data->tag = TW_Valid;   /* Default */
+  loaded_data->sigl = sigl;
+  loaded_data->sigh = sigh;
+  if (exp & 0x8000)
+    loaded_data->sign = SIGN_NEG;
+  else
+    loaded_data->sign = SIGN_POS;
+  exp &= 0x7fff;
+  loaded_data->exp = exp - EXTENDED_Ebias + EXP_BIAS;
+
+  if ( exp == 0 )
+    {
+      if ( !(sigh | sigl) )
+	{
+	  loaded_data->tag = TW_Zero;
+	  return 0;
+	}
+      /* The number is a de-normal or pseudodenormal. */
+      if (sigh & 0x80000000)
+	{
+	  /* Is a pseudodenormal. */
+	  /* Convert it for internal use. */
+	  /* This is non-80486 behaviour because the number
+	     loses its 'denormal' identity. */
+	  loaded_data->exp++;
+	  return 1;
+	}
+      else
+	{
+	  /* Is a denormal. */
+	  /* Convert it for internal use. */
+	  loaded_data->exp++;
+	  normalize_nuo(loaded_data);
+	  return 0;
+	}
+    }
+  else if ( exp == 0x7fff )
+    {
+      if ( !((sigh ^ 0x80000000) | sigl) )
+	{
+	  /* Matches the bit pattern for Infinity. */
+	  loaded_data->exp = EXP_Infinity;
+	  loaded_data->tag = TW_Infinity;
+	  return 0;
+	}
+
+      loaded_data->exp = EXP_NaN;
+      loaded_data->tag = TW_NaN;
+      if ( !(sigh & 0x80000000) )
+	{
+	  /* NaNs have the ms bit set to 1. */
+	  /* This is therefore an Unsupported NaN data type. */
+	  /* This is non 80486 behaviour */
+	  /* This should generate an Invalid Operand exception
+	     later, so we convert it to a SNaN */
+	  loaded_data->sigh = 0x80000000;
+	  loaded_data->sigl = 0x00000001;
+	  loaded_data->sign = SIGN_NEG;
+	  return 1;
+	}
+      return 0;
+    }
+
+  if ( !(sigh & 0x80000000) )
+    {
+      /* Unsupported data type. */
+      /* Valid numbers have the ms bit set to 1. */
+      /* Unnormal. */
+      /* Convert it for internal use. */
+      /* This is non-80486 behaviour */
+      /* This should generate an Invalid Operand exception
+	 later, so we convert it to a SNaN */
+      loaded_data->sigh = 0x80000000;
+      loaded_data->sigl = 0x00000001;
+      loaded_data->sign = SIGN_NEG;
+      loaded_data->exp = EXP_NaN;
+      loaded_data->tag = TW_NaN;
+      return 1;
+    }
+  return 0;
+}
+
+
+/* Get a double from user memory */
+int reg_load_double(double *dfloat, FPU_REG *loaded_data)
+{
+  int exp;
+  unsigned m64, l64;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_verify_area(VERIFY_READ, dfloat, 8);
+  m64 = get_fs_long(1 + (unsigned long *) dfloat);
+  l64 = get_fs_long((unsigned long *) dfloat);
+  RE_ENTRANT_CHECK_ON;
+
+  if (m64 & 0x80000000)
+    loaded_data->sign = SIGN_NEG;
+  else
+    loaded_data->sign = SIGN_POS;
+  exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias;
+  m64 &= 0xfffff;
+  if (exp > DOUBLE_Emax)
+    {
+      /* Infinity or NaN */
+      if ((m64 == 0) && (l64 == 0))
+	{
+	  /* +- infinity */
+	  loaded_data->sigh = 0x80000000;
+	  loaded_data->sigl = 0x00000000;
+	  loaded_data->exp = EXP_Infinity;
+	  loaded_data->tag = TW_Infinity;
+	  return 0;
+	}
+      else
+	{
+	  /* Must be a signaling or quiet NaN */
+	  loaded_data->exp = EXP_NaN;
+	  loaded_data->tag = TW_NaN;
+	  loaded_data->sigh = (m64 << 11) | 0x80000000;
+	  loaded_data->sigh |= l64 >> 21;
+	  loaded_data->sigl = l64 << 11;
+	  return 0; /* The calling function must look for NaNs */
+	}
+    }
+  else if ( exp < DOUBLE_Emin )
+    {
+      /* Zero or de-normal */
+      if ((m64 == 0) && (l64 == 0))
+	{
+	  /* Zero */
+	  int c = loaded_data->sign;
+	  reg_move(&CONST_Z, loaded_data);
+	  loaded_data->sign = c;
+	  return 0;
+	}
+      else
+	{
+	  /* De-normal */
+	  loaded_data->exp = DOUBLE_Emin + EXP_BIAS;
+	  loaded_data->tag = TW_Valid;
+	  loaded_data->sigh = m64 << 11;
+	  loaded_data->sigh |= l64 >> 21;
+	  loaded_data->sigl = l64 << 11;
+	  normalize_nuo(loaded_data);
+	  return denormal_operand();
+	}
+    }
+  else
+    {
+      loaded_data->exp = exp + EXP_BIAS;
+      loaded_data->tag = TW_Valid;
+      loaded_data->sigh = (m64 << 11) | 0x80000000;
+      loaded_data->sigh |= l64 >> 21;
+      loaded_data->sigl = l64 << 11;
+
+      return 0;
+    }
+}
+
+
+/* Get a float from user memory */
+int reg_load_single(float *single, FPU_REG *loaded_data)
+{
+  unsigned m32;
+  int exp;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_verify_area(VERIFY_READ, single, 4);
+  m32 = get_fs_long((unsigned long *) single);
+  RE_ENTRANT_CHECK_ON;
+
+  if (m32 & 0x80000000)
+    loaded_data->sign = SIGN_NEG;
+  else
+    loaded_data->sign = SIGN_POS;
+  if (!(m32 & 0x7fffffff))
+    {
+      /* Zero */
+      int c = loaded_data->sign;
+      reg_move(&CONST_Z, loaded_data);
+      loaded_data->sign = c;
+      return 0;
+    }
+  exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias;
+  m32 = (m32 & 0x7fffff) << 8;
+  if ( exp < SINGLE_Emin )
+    {
+      /* De-normals */
+      loaded_data->exp = SINGLE_Emin + EXP_BIAS;
+      loaded_data->tag = TW_Valid;
+      loaded_data->sigh = m32;
+      loaded_data->sigl = 0;
+      normalize_nuo(loaded_data);
+      return denormal_operand();
+    }
+  else if ( exp > SINGLE_Emax )
+    {
+    /* Infinity or NaN */
+      if ( m32 == 0 )
+	{
+	  /* +- infinity */
+	  loaded_data->sigh = 0x80000000;
+	  loaded_data->sigl = 0x00000000;
+	  loaded_data->exp = EXP_Infinity;
+	  loaded_data->tag = TW_Infinity;
+	  return 0;
+	}
+      else
+	{
+	  /* Must be a signaling or quiet NaN */
+	  loaded_data->exp = EXP_NaN;
+	  loaded_data->tag = TW_NaN;
+	  loaded_data->sigh = m32 | 0x80000000;
+	  loaded_data->sigl = 0;
+	  return 0; /* The calling function must look for NaNs */
+	}
+    }
+  else
+    {
+      loaded_data->exp = exp + EXP_BIAS;
+      loaded_data->sigh = m32 | 0x80000000;
+      loaded_data->sigl = 0;
+      loaded_data->tag = TW_Valid;
+      return 0;
+    }
+}
+
+
+/* Get a long long from user memory */
+void reg_load_int64(long long *_s, FPU_REG *loaded_data)
+{
+  int e;
+  long long s;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_verify_area(VERIFY_READ, _s, 8);
+  ((unsigned long *)&s)[0] = get_fs_long((unsigned long *) _s);
+  ((unsigned long *)&s)[1] = get_fs_long(1 + (unsigned long *) _s);
+  RE_ENTRANT_CHECK_ON;
+
+  if (s == 0)
+    { reg_move(&CONST_Z, loaded_data); return; }
+
+  if (s > 0)
+    loaded_data->sign = SIGN_POS;
+  else
+  {
+    s = -s;
+    loaded_data->sign = SIGN_NEG;
+  }
+
+  e = EXP_BIAS + 63;
+  significand(loaded_data) = s;
+  loaded_data->exp = e;
+  loaded_data->tag = TW_Valid;
+  normalize_nuo(loaded_data);
+}
+
+
+/* Get a long from user memory */
+void reg_load_int32(long *_s, FPU_REG *loaded_data)
+{
+  long s;
+  int e;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_verify_area(VERIFY_READ, _s, 4);
+  s = (long)get_fs_long((unsigned long *) _s);
+  RE_ENTRANT_CHECK_ON;
+
+  if (s == 0)
+    { reg_move(&CONST_Z, loaded_data); return; }
+
+  if (s > 0)
+    loaded_data->sign = SIGN_POS;
+  else
+  {
+    s = -s;
+    loaded_data->sign = SIGN_NEG;
+  }
+
+  e = EXP_BIAS + 31;
+  loaded_data->sigh = s;
+  loaded_data->sigl = 0;
+  loaded_data->exp = e;
+  loaded_data->tag = TW_Valid;
+  normalize_nuo(loaded_data);
+}
+
+
+/* Get a short from user memory */
+void reg_load_int16(short *_s, FPU_REG *loaded_data)
+{
+  int s, e;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_verify_area(VERIFY_READ, _s, 2);
+  /* Cast as short to get the sign extended. */
+  s = (short)get_fs_word((unsigned short *) _s);
+  RE_ENTRANT_CHECK_ON;
+
+  if (s == 0)
+    { reg_move(&CONST_Z, loaded_data); return; }
+
+  if (s > 0)
+    loaded_data->sign = SIGN_POS;
+  else
+  {
+    s = -s;
+    loaded_data->sign = SIGN_NEG;
+  }
+
+  e = EXP_BIAS + 15;
+  loaded_data->sigh = s << 16;
+
+  loaded_data->sigl = 0;
+  loaded_data->exp = e;
+  loaded_data->tag = TW_Valid;
+  normalize_nuo(loaded_data);
+}
+
+
+/* Get a packed bcd array from user memory */
+void reg_load_bcd(char *s, FPU_REG *loaded_data)
+{
+  int pos;
+  unsigned char bcd;
+  long long l=0;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_verify_area(VERIFY_READ, s, 10);
+  RE_ENTRANT_CHECK_ON;
+  for ( pos = 8; pos >= 0; pos--)
+    {
+      l *= 10;
+      RE_ENTRANT_CHECK_OFF;
+      bcd = (unsigned char)get_fs_byte((unsigned char *) s+pos);
+      RE_ENTRANT_CHECK_ON;
+      l += bcd >> 4;
+      l *= 10;
+      l += bcd & 0x0f;
+    }
+ 
+  RE_ENTRANT_CHECK_OFF;
+  loaded_data->sign =
+    ((unsigned char)get_fs_byte((unsigned char *) s+9)) & 0x80 ?
+      SIGN_NEG : SIGN_POS;
+  RE_ENTRANT_CHECK_ON;
+
+  if (l == 0)
+    {
+      char sign = loaded_data->sign;
+      reg_move(&CONST_Z, loaded_data);
+      loaded_data->sign = sign;
+    }
+  else
+    {
+      significand(loaded_data) = l;
+      loaded_data->exp = EXP_BIAS + 63;
+      loaded_data->tag = TW_Valid;
+      normalize_nuo(loaded_data);
+    }
+}
+
+/*===========================================================================*/
+
+/* Put a long double into user memory */
+int reg_store_extended(long double *d, FPU_REG *st0_ptr)
+{
+  /*
+    The only exception raised by an attempt to store to an
+    extended format is the Invalid Stack exception, i.e.
+    attempting to store from an empty register.
+   */
+
+  if ( st0_ptr->tag != TW_Empty )
+    {
+      RE_ENTRANT_CHECK_OFF;
+      FPU_verify_area(VERIFY_WRITE, d, 10);
+      RE_ENTRANT_CHECK_ON;
+      write_to_extended(st0_ptr, (char *) d);
+      return 1;
+    }
+
+  /* Empty register (stack underflow) */
+  EXCEPTION(EX_StackUnder);
+  if ( control_word & CW_Invalid )
+    {
+      /* The masked response */
+      /* Put out the QNaN indefinite */
+      RE_ENTRANT_CHECK_OFF;
+      FPU_verify_area(VERIFY_WRITE,d,10);
+      put_fs_long(0, (unsigned long *) d);
+      put_fs_long(0xc0000000, 1 + (unsigned long *) d);
+      put_fs_word(0xffff, 4 + (short *) d);
+      RE_ENTRANT_CHECK_ON;
+      return 1;
+    }
+  else
+    return 0;
+
+}
+
+
+/* Put a double into user memory */
+int reg_store_double(double *dfloat, FPU_REG *st0_ptr)
+{
+  unsigned long l[2];
+  unsigned long increment = 0;	/* avoid gcc warnings */
+  char st0_tag = st0_ptr->tag;
+
+  if (st0_tag == TW_Valid)
+    {
+      int exp;
+      FPU_REG tmp;
+
+      reg_move(st0_ptr, &tmp);
+      exp = tmp.exp - EXP_BIAS;
+
+      if ( exp < DOUBLE_Emin )     /* It may be a denormal */
+	{
+	  int precision_loss;
+
+	  /* A denormal will always underflow. */
+#ifndef PECULIAR_486
+	  /* An 80486 is supposed to be able to generate
+	     a denormal exception here, but... */
+	  if ( st0_ptr->exp <= EXP_UNDER )
+	    {
+	      /* Underflow has priority. */
+	      if ( control_word & CW_Underflow )
+		denormal_operand();
+	    }
+#endif PECULIAR_486
+
+	  tmp.exp += -DOUBLE_Emin + 52;  /* largest exp to be 51 */
+
+	  if ( (precision_loss = round_to_int(&tmp)) )
+	    {
+#ifdef PECULIAR_486
+	      /* Did it round to a non-denormal ? */
+	      /* This behaviour might be regarded as peculiar, it appears
+		 that the 80486 rounds to the dest precision, then
+		 converts to decide underflow. */
+	      if ( !((tmp.sigh == 0x00100000) && (tmp.sigl == 0) &&
+		  (st0_ptr->sigl & 0x000007ff)) )
+#endif PECULIAR_486
+		{
+		  EXCEPTION(EX_Underflow);
+		  /* This is a special case: see sec 16.2.5.1 of
+		     the 80486 book */
+		  if ( !(control_word & CW_Underflow) )
+		    return 0;
+		}
+	      EXCEPTION(precision_loss);
+	      if ( !(control_word & CW_Precision) )
+		return 0;
+	    }
+	  l[0] = tmp.sigl;
+	  l[1] = tmp.sigh;
+	}
+      else
+	{
+	  if ( tmp.sigl & 0x000007ff )
+	    {
+	      switch (control_word & CW_RC)
+		{
+		case RC_RND:
+		  /* Rounding can get a little messy.. */
+		  increment = ((tmp.sigl & 0x7ff) > 0x400) |  /* nearest */
+		    ((tmp.sigl & 0xc00) == 0xc00);            /* odd -> even */
+		  break;
+		case RC_DOWN:   /* towards -infinity */
+		  increment = (tmp.sign == SIGN_POS) ? 0 : tmp.sigl & 0x7ff;
+		  break;
+		case RC_UP:     /* towards +infinity */
+		  increment = (tmp.sign == SIGN_POS) ? tmp.sigl & 0x7ff : 0;
+		  break;
+		case RC_CHOP:
+		  increment = 0;
+		  break;
+		}
+	  
+	      /* Truncate the mantissa */
+	      tmp.sigl &= 0xfffff800;
+	  
+	      if ( increment )
+		{
+		  set_precision_flag_up();
+
+		  if ( tmp.sigl >= 0xfffff800 )
+		    {
+		      /* the sigl part overflows */
+		      if ( tmp.sigh == 0xffffffff )
+			{
+			  /* The sigh part overflows */
+			  tmp.sigh = 0x80000000;
+			  exp++;
+			  if (exp >= EXP_OVER)
+			    goto overflow;
+			}
+		      else
+			{
+			  tmp.sigh ++;
+			}
+		      tmp.sigl = 0x00000000;
+		    }
+		  else
+		    {
+		      /* We only need to increment sigl */
+		      tmp.sigl += 0x00000800;
+		    }
+		}
+	      else
+		set_precision_flag_down();
+	    }
+	  
+	  l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21);
+	  l[1] = ((tmp.sigh >> 11) & 0xfffff);
+
+	  if ( exp > DOUBLE_Emax )
+	    {
+	    overflow:
+	      EXCEPTION(EX_Overflow);
+	      if ( !(control_word & CW_Overflow) )
+		return 0;
+	      set_precision_flag_up();
+	      if ( !(control_word & CW_Precision) )
+		return 0;
+
+	      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
+	      /* Overflow to infinity */
+	      l[0] = 0x00000000;	/* Set to */
+	      l[1] = 0x7ff00000;	/* + INF */
+	    }
+	  else
+	    {
+	      /* Add the exponent */
+	      l[1] |= (((exp+DOUBLE_Ebias) & 0x7ff) << 20);
+	    }
+	}
+    }
+  else if (st0_tag == TW_Zero)
+    {
+      /* Number is zero */
+      l[0] = 0;
+      l[1] = 0;
+    }
+  else if (st0_tag == TW_Infinity)
+    {
+      l[0] = 0;
+      l[1] = 0x7ff00000;
+    }
+  else if (st0_tag == TW_NaN)
+    {
+      /* See if we can get a valid NaN from the FPU_REG */
+      l[0] = (st0_ptr->sigl >> 11) | (st0_ptr->sigh << 21);
+      l[1] = ((st0_ptr->sigh >> 11) & 0xfffff);
+      if ( !(st0_ptr->sigh & 0x40000000) )
+	{
+	  /* It is a signalling NaN */
+	  EXCEPTION(EX_Invalid);
+	  if ( !(control_word & CW_Invalid) )
+	    return 0;
+	  l[1] |= (0x40000000 >> 11);
+	}
+      l[1] |= 0x7ff00000;
+    }
+  else if ( st0_tag == TW_Empty )
+    {
+      /* Empty register (stack underflow) */
+      EXCEPTION(EX_StackUnder);
+      if ( control_word & CW_Invalid )
+	{
+	  /* The masked response */
+	  /* Put out the QNaN indefinite */
+	  RE_ENTRANT_CHECK_OFF;
+	  FPU_verify_area(VERIFY_WRITE,(void *)dfloat,8);
+	  put_fs_long(0, (unsigned long *) dfloat);
+	  put_fs_long(0xfff80000, 1 + (unsigned long *) dfloat);
+	  RE_ENTRANT_CHECK_ON;
+	  return 1;
+	}
+      else
+	return 0;
+    }
+  if ( st0_ptr->sign )
+    l[1] |= 0x80000000;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_verify_area(VERIFY_WRITE,(void *)dfloat,8);
+  put_fs_long(l[0], (unsigned long *)dfloat);
+  put_fs_long(l[1], 1 + (unsigned long *)dfloat);
+  RE_ENTRANT_CHECK_ON;
+
+  return 1;
+}
+
+
+/* Put a float into user memory */
+int reg_store_single(float *single, FPU_REG *st0_ptr)
+{
+  long templ;
+  unsigned long increment = 0;     	/* avoid gcc warnings */
+  char st0_tag = st0_ptr->tag;
+
+  if (st0_tag == TW_Valid)
+    {
+      int exp;
+      FPU_REG tmp;
+
+      reg_move(st0_ptr, &tmp);
+      exp = tmp.exp - EXP_BIAS;
+
+      if ( exp < SINGLE_Emin )
+	{
+	  int precision_loss;
+
+	  /* A denormal will always underflow. */
+#ifndef PECULIAR_486
+	  /* An 80486 is supposed to be able to generate
+	     a denormal exception here, but... */
+	  if ( st0_ptr->exp <= EXP_UNDER )
+	    {
+	      /* Underflow has priority. */
+	      if ( control_word & CW_Underflow )
+		denormal_operand();
+	    }
+#endif PECULIAR_486
+
+	  tmp.exp += -SINGLE_Emin + 23;  /* largest exp to be 22 */
+
+	  if ( (precision_loss = round_to_int(&tmp)) )
+	    {
+#ifdef PECULIAR_486
+	      /* Did it round to a non-denormal ? */
+	      /* This behaviour might be regarded as peculiar, it appears
+		 that the 80486 rounds to the dest precision, then
+		 converts to decide underflow. */
+	      if ( !((tmp.sigl == 0x00800000) &&
+		  ((st0_ptr->sigh & 0x000000ff) || st0_ptr->sigl)) )
+#endif PECULIAR_486
+		{
+		  EXCEPTION(EX_Underflow);
+		  /* This is a special case: see sec 16.2.5.1 of
+		     the 80486 book */
+		  if ( !(control_word & EX_Underflow) )
+		    return 0;
+		}
+	      EXCEPTION(precision_loss);
+	      if ( !(control_word & EX_Precision) )
+		return 0;
+	    }
+	  templ = tmp.sigl;
+	}
+      else
+	{
+	  if ( tmp.sigl | (tmp.sigh & 0x000000ff) )
+	    {
+	      unsigned long sigh = tmp.sigh;
+	      unsigned long sigl = tmp.sigl;
+	      
+	      switch (control_word & CW_RC)
+		{
+		case RC_RND:
+		  increment = ((sigh & 0xff) > 0x80)       /* more than half */
+		    || (((sigh & 0xff) == 0x80) && sigl)   /* more than half */
+		      || ((sigh & 0x180) == 0x180);        /* round to even */
+		  break;
+		case RC_DOWN:   /* towards -infinity */
+		  increment = (tmp.sign == SIGN_POS)
+		              ? 0 : (sigl | (sigh & 0xff));
+		  break;
+		case RC_UP:     /* towards +infinity */
+		  increment = (tmp.sign == SIGN_POS)
+		              ? (sigl | (sigh & 0xff)) : 0;
+		  break;
+		case RC_CHOP:
+		  increment = 0;
+		  break;
+		}
+	  
+	      /* Truncate part of the mantissa */
+	      tmp.sigl = 0;
+	  
+	      if (increment)
+		{
+		  set_precision_flag_up();
+
+		  if ( sigh >= 0xffffff00 )
+		    {
+		      /* The sigh part overflows */
+		      tmp.sigh = 0x80000000;
+		      exp++;
+		      if ( exp >= EXP_OVER )
+			goto overflow;
+		    }
+		  else
+		    {
+		      tmp.sigh &= 0xffffff00;
+		      tmp.sigh += 0x100;
+		    }
+		}
+	      else
+		{
+		  set_precision_flag_down();
+		  tmp.sigh &= 0xffffff00;  /* Finish the truncation */
+		}
+	    }
+
+	  templ = (tmp.sigh >> 8) & 0x007fffff;
+
+	  if ( exp > SINGLE_Emax )
+	    {
+	    overflow:
+	      EXCEPTION(EX_Overflow);
+	      if ( !(control_word & CW_Overflow) )
+		return 0;
+	      set_precision_flag_up();
+	      if ( !(control_word & CW_Precision) )
+		return 0;
+
+	      /* This is a special case: see sec 16.2.5.1 of the 80486 book. */
+	      /* Masked response is overflow to infinity. */
+	      templ = 0x7f800000;
+	    }
+	  else
+	    templ |= ((exp+SINGLE_Ebias) & 0xff) << 23;
+	}
+    }
+  else if (st0_tag == TW_Zero)
+    {
+      templ = 0;
+    }
+  else if (st0_tag == TW_Infinity)
+    {
+      templ = 0x7f800000;
+    }
+  else if (st0_tag == TW_NaN)
+    {
+      /* See if we can get a valid NaN from the FPU_REG */
+      templ = st0_ptr->sigh >> 8;
+      if ( !(st0_ptr->sigh & 0x40000000) )
+	{
+	  /* It is a signalling NaN */
+	  EXCEPTION(EX_Invalid);
+	  if ( !(control_word & CW_Invalid) )
+	    return 0;
+	  templ |= (0x40000000 >> 8);
+	}
+      templ |= 0x7f800000;
+    }
+  else if ( st0_tag == TW_Empty )
+    {
+      /* Empty register (stack underflow) */
+      EXCEPTION(EX_StackUnder);
+      if ( control_word & EX_Invalid )
+	{
+	  /* The masked response */
+	  /* Put out the QNaN indefinite */
+	  RE_ENTRANT_CHECK_OFF;
+	  FPU_verify_area(VERIFY_WRITE,(void *)single,4);
+	  put_fs_long(0xffc00000, (unsigned long *) single);
+	  RE_ENTRANT_CHECK_ON;
+	  return 1;
+	}
+      else
+	return 0;
+    }
+#ifdef PARANOID
+  else
+    {
+      EXCEPTION(EX_INTERNAL|0x163);
+      return 0;
+    }
+#endif
+  if (st0_ptr->sign)
+    templ |= 0x80000000;
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_verify_area(VERIFY_WRITE,(void *)single,4);
+  put_fs_long(templ,(unsigned long *) single);
+  RE_ENTRANT_CHECK_ON;
+
+  return 1;
+}
+
+
+/* Put a long long into user memory */
+int reg_store_int64(long long *d, FPU_REG *st0_ptr)
+{
+  FPU_REG t;
+  long long tll;
+  int precision_loss;
+  char st0_tag = st0_ptr->tag;
+
+  if ( st0_tag == TW_Empty )
+    {
+      /* Empty register (stack underflow) */
+      EXCEPTION(EX_StackUnder);
+      goto invalid_operand;
+    }
+  else if ( (st0_tag == TW_Infinity) ||
+	   (st0_tag == TW_NaN) )
+    {
+      EXCEPTION(EX_Invalid);
+      goto invalid_operand;
+    }
+
+  reg_move(st0_ptr, &t);
+  precision_loss = round_to_int(&t);
+  ((long *)&tll)[0] = t.sigl;
+  ((long *)&tll)[1] = t.sigh;
+  if ( (precision_loss == 1) ||
+      ((t.sigh & 0x80000000) &&
+       !((t.sigh == 0x80000000) && (t.sigl == 0) &&
+	 (t.sign == SIGN_NEG))) )
+    {
+      EXCEPTION(EX_Invalid);
+      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
+    invalid_operand:
+      if ( control_word & EX_Invalid )
+	{
+	  /* Produce something like QNaN "indefinite" */
+	  tll = 0x8000000000000000LL;
+	}
+      else
+	return 0;
+    }
+  else
+    {
+      if ( precision_loss )
+	set_precision_flag(precision_loss);
+      if ( t.sign )
+	tll = - tll;
+    }
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_verify_area(VERIFY_WRITE,(void *)d,8);
+  put_fs_long(((long *)&tll)[0],(unsigned long *) d);
+  put_fs_long(((long *)&tll)[1],1 + (unsigned long *) d);
+  RE_ENTRANT_CHECK_ON;
+
+  return 1;
+}
+
+
+/* Put a long into user memory */
+int reg_store_int32(long *d, FPU_REG *st0_ptr)
+{
+  FPU_REG t;
+  int precision_loss;
+  char st0_tag = st0_ptr->tag;
+
+  if ( st0_tag == TW_Empty )
+    {
+      /* Empty register (stack underflow) */
+      EXCEPTION(EX_StackUnder);
+      goto invalid_operand;
+    }
+  else if ( (st0_tag == TW_Infinity) ||
+	   (st0_tag == TW_NaN) )
+    {
+      EXCEPTION(EX_Invalid);
+      goto invalid_operand;
+    }
+
+  reg_move(st0_ptr, &t);
+  precision_loss = round_to_int(&t);
+  if (t.sigh ||
+      ((t.sigl & 0x80000000) &&
+       !((t.sigl == 0x80000000) && (t.sign == SIGN_NEG))) )
+    {
+      EXCEPTION(EX_Invalid);
+      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
+    invalid_operand:
+      if ( control_word & EX_Invalid )
+	{
+	  /* Produce something like QNaN "indefinite" */
+	  t.sigl = 0x80000000;
+	}
+      else
+	return 0;
+    }
+  else
+    {
+      if ( precision_loss )
+	set_precision_flag(precision_loss);
+      if ( t.sign )
+	t.sigl = -(long)t.sigl;
+    }
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_verify_area(VERIFY_WRITE,d,4);
+  put_fs_long(t.sigl, (unsigned long *) d);
+  RE_ENTRANT_CHECK_ON;
+
+  return 1;
+}
+
+
+/* Put a short into user memory */
+int reg_store_int16(short *d, FPU_REG *st0_ptr)
+{
+  FPU_REG t;
+  int precision_loss;
+  char st0_tag = st0_ptr->tag;
+
+  if ( st0_tag == TW_Empty )
+    {
+      /* Empty register (stack underflow) */
+      EXCEPTION(EX_StackUnder);
+      goto invalid_operand;
+    }
+  else if ( (st0_tag == TW_Infinity) ||
+	   (st0_tag == TW_NaN) )
+    {
+      EXCEPTION(EX_Invalid);
+      goto invalid_operand;
+    }
+
+  reg_move(st0_ptr, &t);
+  precision_loss = round_to_int(&t);
+  if (t.sigh ||
+      ((t.sigl & 0xffff8000) &&
+       !((t.sigl == 0x8000) && (t.sign == SIGN_NEG))) )
+    {
+      EXCEPTION(EX_Invalid);
+      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
+    invalid_operand:
+      if ( control_word & EX_Invalid )
+	{
+	  /* Produce something like QNaN "indefinite" */
+	  t.sigl = 0x8000;
+	}
+      else
+	return 0;
+    }
+  else
+    {
+      if ( precision_loss )
+	set_precision_flag(precision_loss);
+      if ( t.sign )
+	t.sigl = -t.sigl;
+    }
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_verify_area(VERIFY_WRITE,d,2);
+  put_fs_word((short)t.sigl,(short *) d);
+  RE_ENTRANT_CHECK_ON;
+
+  return 1;
+}
+
+
+/* Put a packed bcd array into user memory */
+int reg_store_bcd(char *d, FPU_REG *st0_ptr)
+{
+  FPU_REG t;
+  unsigned long long ll;
+  unsigned char b;
+  int i, precision_loss;
+  unsigned char sign = (st0_ptr->sign == SIGN_NEG) ? 0x80 : 0;
+  char st0_tag = st0_ptr->tag;
+
+  if ( st0_tag == TW_Empty )
+    {
+      /* Empty register (stack underflow) */
+      EXCEPTION(EX_StackUnder);
+      goto invalid_operand;
+    }
+
+  reg_move(st0_ptr, &t);
+  precision_loss = round_to_int(&t);
+  ll = significand(&t);
+
+  /* Check for overflow, by comparing with 999999999999999999 decimal. */
+  if ( (t.sigh > 0x0de0b6b3) ||
+      ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff)) )
+    {
+      EXCEPTION(EX_Invalid);
+      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
+    invalid_operand:
+      if ( control_word & CW_Invalid )
+	{
+	  /* Produce the QNaN "indefinite" */
+	  RE_ENTRANT_CHECK_OFF;
+	  FPU_verify_area(VERIFY_WRITE,d,10);
+	  for ( i = 0; i < 7; i++)
+	    put_fs_byte(0, (unsigned char *) d+i); /* These bytes "undefined" */
+	  put_fs_byte(0xc0, (unsigned char *) d+7); /* This byte "undefined" */
+	  put_fs_byte(0xff, (unsigned char *) d+8);
+	  put_fs_byte(0xff, (unsigned char *) d+9);
+	  RE_ENTRANT_CHECK_ON;
+	  return 1;
+	}
+      else
+	return 0;
+    }
+  else if ( precision_loss )
+    {
+      /* Precision loss doesn't stop the data transfer */
+      set_precision_flag(precision_loss);
+    }
+
+  RE_ENTRANT_CHECK_OFF;
+  FPU_verify_area(VERIFY_WRITE,d,10);
+  RE_ENTRANT_CHECK_ON;
+  for ( i = 0; i < 9; i++)
+    {
+      b = div_small(&ll, 10);
+      b |= (div_small(&ll, 10)) << 4;
+      RE_ENTRANT_CHECK_OFF;
+      put_fs_byte(b,(unsigned char *) d+i);
+      RE_ENTRANT_CHECK_ON;
+    }
+  RE_ENTRANT_CHECK_OFF;
+  put_fs_byte(sign,(unsigned char *) d+9);
+  RE_ENTRANT_CHECK_ON;
+
+  return 1;
+}
+
+/*===========================================================================*/
+
+/* r gets mangled such that sig is int, sign: 
+   it is NOT normalized */
+/* The return value (in eax) is zero if the result is exact,
+   if bits are changed due to rounding, truncation, etc, then
+   a non-zero value is returned */
+/* Overflow is signalled by a non-zero return value (in eax).
+   In the case of overflow, the returned significand always has the
+   largest possible value */
+int round_to_int(FPU_REG *r)
+{
+  char     very_big;
+  unsigned eax;
+
+  if (r->tag == TW_Zero)
+    {
+      /* Make sure that zero is returned */
+      significand(r) = 0;
+      return 0;        /* o.k. */
+    }
+  
+  if (r->exp > EXP_BIAS + 63)
+    {
+      r->sigl = r->sigh = ~0;      /* The largest representable number */
+      return 1;        /* overflow */
+    }
+
+  eax = shrxs(&r->sigl, EXP_BIAS + 63 - r->exp);
+  very_big = !(~(r->sigh) | ~(r->sigl));  /* test for 0xfff...fff */
+#define	half_or_more	(eax & 0x80000000)
+#define	frac_part	(eax)
+#define more_than_half  ((eax & 0x80000001) == 0x80000001)
+  switch (control_word & CW_RC)
+    {
+    case RC_RND:
+      if ( more_than_half               	/* nearest */
+	  || (half_or_more && (r->sigl & 1)) )	/* odd -> even */
+	{
+	  if ( very_big ) return 1;        /* overflow */
+	  significand(r) ++;
+	  return PRECISION_LOST_UP;
+	}
+      break;
+    case RC_DOWN:
+      if (frac_part && r->sign)
+	{
+	  if ( very_big ) return 1;        /* overflow */
+	  significand(r) ++;
+	  return PRECISION_LOST_UP;
+	}
+      break;
+    case RC_UP:
+      if (frac_part && !r->sign)
+	{
+	  if ( very_big ) return 1;        /* overflow */
+	  significand(r) ++;
+	  return PRECISION_LOST_UP;
+	}
+      break;
+    case RC_CHOP:
+      break;
+    }
+
+  return eax ? PRECISION_LOST_DOWN : 0;
+
+}
+
+/*===========================================================================*/
+
+char *fldenv(fpu_addr_modes addr_modes, char *s)
+{
+  unsigned short tag_word = 0;
+  unsigned char tag;
+  int i;
+
+  if ( (addr_modes.default_mode == VM86) ||
+      ((addr_modes.default_mode == PM16)
+      ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
+    {
+      RE_ENTRANT_CHECK_OFF;
+      FPU_verify_area(VERIFY_READ, s, 0x0e);
+      control_word = get_fs_word((unsigned short *) s);
+      partial_status = get_fs_word((unsigned short *) (s+2));
+      tag_word = get_fs_word((unsigned short *) (s+4));
+      instruction_address.offset = get_fs_word((unsigned short *) (s+6));
+      instruction_address.selector = get_fs_word((unsigned short *) (s+8));
+      operand_address.offset = get_fs_word((unsigned short *) (s+0x0a));
+      operand_address.selector = get_fs_word((unsigned short *) (s+0x0c));
+      RE_ENTRANT_CHECK_ON;
+      s += 0x0e;
+      if ( addr_modes.default_mode == VM86 )
+	{
+	  instruction_address.offset
+	    += (instruction_address.selector & 0xf000) << 4;
+	  operand_address.offset += (operand_address.selector & 0xf000) << 4;
+	}
+    }
+  else
+    {
+      RE_ENTRANT_CHECK_OFF;
+      FPU_verify_area(VERIFY_READ, s, 0x1c);
+      control_word = get_fs_word((unsigned short *) s);
+      partial_status = get_fs_word((unsigned short *) (s+4));
+      tag_word = get_fs_word((unsigned short *) (s+8));
+      instruction_address.offset = get_fs_long((unsigned long *) (s+0x0c));
+      instruction_address.selector = get_fs_word((unsigned short *) (s+0x10));
+      instruction_address.opcode = get_fs_word((unsigned short *) (s+0x12));
+      operand_address.offset = get_fs_long((unsigned long *) (s+0x14));
+      operand_address.selector = get_fs_long((unsigned long *) (s+0x18));
+      RE_ENTRANT_CHECK_ON;
+      s += 0x1c;
+    }
+
+#ifdef PECULIAR_486
+  control_word &= ~0xe080;
+#endif PECULIAR_486
+
+  top = (partial_status >> SW_Top_Shift) & 7;
+
+  if ( partial_status & ~control_word & CW_Exceptions )
+    partial_status |= (SW_Summary | SW_Backward);
+  else
+    partial_status &= ~(SW_Summary | SW_Backward);
+
+  for ( i = 0; i < 8; i++ )
+    {
+      tag = tag_word & 3;
+      tag_word >>= 2;
+
+      if ( tag == 3 )
+	/* New tag is empty.  Accept it */
+	regs[i].tag = TW_Empty;
+      else if ( regs[i].tag == TW_Empty )
+	{
+	  /* Old tag is empty and new tag is not empty.  New tag is determined
+	     by old reg contents */
+	  if ( regs[i].exp == EXP_BIAS - EXTENDED_Ebias )
+	    {
+	      if ( !(regs[i].sigl | regs[i].sigh) )
+		regs[i].tag = TW_Zero;
+	      else
+		regs[i].tag = TW_Valid;
+	    }
+	  else if ( regs[i].exp == 0x7fff + EXP_BIAS - EXTENDED_Ebias )
+	    {
+	      if ( !((regs[i].sigh & ~0x80000000) | regs[i].sigl) )
+		regs[i].tag = TW_Infinity;
+	      else
+		regs[i].tag = TW_NaN;
+	    }
+	  else
+	    regs[i].tag = TW_Valid;
+  	}
+      /* Else old tag is not empty and new tag is not empty.  Old tag
+	 remains correct */
+    }
+
+  return s;
+}
+
+
+void frstor(fpu_addr_modes addr_modes, char *data_address)
+{
+  int i, stnr;
+  unsigned char tag;
+  char *s = fldenv(addr_modes, data_address);
+
+  for ( i = 0; i < 8; i++ )
+    {
+      /* Load each register. */
+      stnr = (i+top) & 7;
+      tag = regs[stnr].tag;   /* Derived from the fldenv() loaded tag word. */
+      reg_load_extended((long double *)(s+i*10), &regs[stnr]);
+      if ( tag == TW_Empty )  /* The loaded data over-rides all other cases. */
+	regs[stnr].tag = tag;
+    }
+
+}
+
+
+unsigned short tag_word(void)
+{
+  unsigned short word = 0;
+  unsigned char tag;
+  int i;
+
+  for ( i = 7; i >= 0; i-- )
+    {
+      switch ( tag = regs[i].tag )
+	{
+	case TW_Valid:
+	  if ( regs[i].exp <= (EXP_BIAS - EXTENDED_Ebias) )
+	    tag = 2;
+	  break;
+	case TW_Infinity:
+	case TW_NaN:
+	  tag = 2;
+	  break;
+	case TW_Empty:
+	  tag = 3;
+	  break;
+	  /* TW_Zero already has the correct value */
+	}
+      word <<= 2;
+      word |= tag;
+    }
+  return word;
+}
+
+
+char *fstenv(fpu_addr_modes addr_modes, char *d)
+{
+  if ( (addr_modes.default_mode == VM86) ||
+      ((addr_modes.default_mode == PM16)
+      ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
+    {
+      RE_ENTRANT_CHECK_OFF;
+      FPU_verify_area(VERIFY_WRITE,d,14);
+#ifdef PECULIAR_486
+      put_fs_long(control_word & ~0xe080, (unsigned short *) d);
+#else
+      put_fs_word(control_word, (unsigned short *) d);
+#endif PECULIAR_486
+      put_fs_word(status_word(), (unsigned short *) (d+2));
+      put_fs_word(tag_word(), (unsigned short *) (d+4));
+      put_fs_word(instruction_address.offset, (unsigned short *) (d+6));
+      put_fs_word(operand_address.offset, (unsigned short *) (d+0x0a));
+      if ( addr_modes.default_mode == VM86 )
+	{
+	  put_fs_word((instruction_address.offset & 0xf0000) >> 4,
+		      (unsigned short *) (d+8));
+	  put_fs_word((operand_address.offset & 0xf0000) >> 4,
+		      (unsigned short *) (d+0x0c));
+	}
+      else
+	{
+	  put_fs_word(instruction_address.selector, (unsigned short *) (d+8));
+	  put_fs_word(operand_address.selector, (unsigned short *) (d+0x0c));
+	}
+      RE_ENTRANT_CHECK_ON;
+      d += 0x0e;
+    }
+  else
+    {
+      RE_ENTRANT_CHECK_OFF;
+      FPU_verify_area(VERIFY_WRITE,d,28);
+#ifdef PECULIAR_486
+      /* An 80486 sets all the reserved bits to 1. */
+      put_fs_long(0xffff0040 | (control_word & ~0xe080), (unsigned long *) d);
+      put_fs_long(0xffff0000 | status_word(), (unsigned long *) (d+4));
+      put_fs_long(0xffff0000 | tag_word(), (unsigned long *) (d+8));
+#else
+      put_fs_word(control_word, (unsigned short *) d);
+      put_fs_word(status_word(), (unsigned short *) (d+4));
+      put_fs_word(tag_word(), (unsigned short *) (d+8));
+#endif PECULIAR_486
+      put_fs_long(instruction_address.offset, (unsigned long *) (d+0x0c));
+      put_fs_word(instruction_address.selector, (unsigned short *) (d+0x10));
+      put_fs_word(instruction_address.opcode, (unsigned short *) (d+0x12));
+      put_fs_long(operand_address.offset, (unsigned long *) (d+0x14));
+#ifdef PECULIAR_486
+      /* An 80486 sets all the reserved bits to 1. */
+      put_fs_word(operand_address.selector, (unsigned short *) (d+0x18));
+      put_fs_word(0xffff, (unsigned short *) (d+0x1a));
+#else
+      put_fs_long(operand_address.selector, (unsigned long *) (d+0x18));
+#endif PECULIAR_486
+      RE_ENTRANT_CHECK_ON;
+      d += 0x1c;
+    }
+  
+  control_word |= CW_Exceptions;
+  partial_status &= ~(SW_Summary | SW_Backward);
+
+  return d;
+}
+
+
+void fsave(fpu_addr_modes addr_modes, char *data_address)
+{
+  char *d;
+  int i;
+
+  d = fstenv(addr_modes, data_address);
+  RE_ENTRANT_CHECK_OFF;
+  FPU_verify_area(VERIFY_WRITE,d,80);
+  RE_ENTRANT_CHECK_ON;
+  for ( i = 0; i < 8; i++ )
+    write_to_extended(&regs[(top + i) & 7], d + 10 * i);
+
+  finit();
+
+}
+
+/*===========================================================================*/
+
+/*
+  A call to this function must be preceded by a call to
+  FPU_verify_area() to verify access to the 10 bytes at d
+  */
+static void write_to_extended(FPU_REG *rp, char *d)
+{
+  long e;
+  FPU_REG tmp;
+  
+  e = rp->exp - EXP_BIAS + EXTENDED_Ebias;
+
+#ifdef PARANOID
+  switch ( rp->tag )
+    {
+    case TW_Zero:
+      if ( rp->sigh | rp->sigl | e )
+	EXCEPTION(EX_INTERNAL | 0x160);
+      break;
+    case TW_Infinity:
+    case TW_NaN:
+      if ( (e ^ 0x7fff) | !(rp->sigh & 0x80000000) )
+	EXCEPTION(EX_INTERNAL | 0x161);
+      break;
+    default:
+      if (e > 0x7fff || e < -63)
+	EXCEPTION(EX_INTERNAL | 0x162);
+    }
+#endif PARANOID
+
+  /*
+    All numbers except denormals are stored internally in a
+    format which is compatible with the extended real number
+    format.
+   */
+  if ( e > 0 )
+    {
+      /* just copy the reg */
+      RE_ENTRANT_CHECK_OFF;
+      put_fs_long(rp->sigl, (unsigned long *) d);
+      put_fs_long(rp->sigh, (unsigned long *) (d + 4));
+      RE_ENTRANT_CHECK_ON;
+    }
+  else
+    {
+      /*
+	The number is a de-normal stored as a normal using our
+	extra exponent range, or is Zero.
+	Convert it back to a de-normal, or leave it as Zero.
+       */
+      reg_move(rp, &tmp);
+      tmp.exp += -EXTENDED_Emin + 63;  /* largest exp to be 63 */
+      round_to_int(&tmp);
+      e = 0;
+      RE_ENTRANT_CHECK_OFF;
+      put_fs_long(tmp.sigl, (unsigned long *) d);
+      put_fs_long(tmp.sigh, (unsigned long *) (d + 4));
+      RE_ENTRANT_CHECK_ON;
+    }
+  e |= rp->sign == SIGN_POS ? 0 : 0x8000;
+  RE_ENTRANT_CHECK_OFF;
+  put_fs_word(e, (unsigned short *) (d + 8));
+  RE_ENTRANT_CHECK_ON;
+}
diff --git a/arch/i386/math-emu/reg_mul.c b/arch/i386/math-emu/reg_mul.c
new file mode 100644
index 000000000..75246187b
--- /dev/null
+++ b/arch/i386/math-emu/reg_mul.c
@@ -0,0 +1,105 @@
+/*---------------------------------------------------------------------------+
+ |  reg_mul.c                                                                |
+ |                                                                           |
+ | Multiply one FPU_REG by another, put the result in a destination FPU_REG. |
+ |                                                                           |
+ | Copyright (C) 1992,1993                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | The destination may be any FPU_REG, including one of the source FPU_REGs. |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "reg_constant.h"
+#include "fpu_emu.h"
+#include "fpu_system.h"
+
+
+/* This routine must be called with non-empty source registers */
+int reg_mul(FPU_REG const *a, FPU_REG const *b,
+	    FPU_REG *dest, unsigned int control_w)
+{
+  char saved_sign = dest->sign;
+  char sign = (a->sign ^ b->sign);
+
+  if (!(a->tag | b->tag))
+    {
+      /* Both regs Valid, this should be the most common case. */
+      dest->sign = sign;
+      if ( reg_u_mul(a, b, dest, control_w) )
+	{
+	  dest->sign = saved_sign;
+	  return 1;
+	}
+      return 0;
+    }
+  else if ((a->tag <= TW_Zero) && (b->tag <= TW_Zero))
+    {
+#ifdef DENORM_OPERAND
+      if ( ((b->tag == TW_Valid) && (b->exp <= EXP_UNDER)) ||
+	  ((a->tag == TW_Valid) && (a->exp <= EXP_UNDER)) )
+	{
+	  if ( denormal_operand() ) return 1;
+	}
+#endif DENORM_OPERAND
+      /* Must have either both arguments == zero, or
+	 one valid and the other zero.
+	 The result is therefore zero. */
+      reg_move(&CONST_Z, dest);
+      /* The 80486 book says that the answer is +0, but a real
+	 80486 behaves this way.
+	 IEEE-754 apparently says it should be this way. */
+      dest->sign = sign;
+      return 0;
+    }
+  else
+    {
+      /* Must have infinities, NaNs, etc */
+      if ( (a->tag == TW_NaN) || (b->tag == TW_NaN) )
+	{ return real_2op_NaN(a, b, dest); }
+      else if (a->tag == TW_Infinity)
+	{
+	  if (b->tag == TW_Zero)
+	    { return arith_invalid(dest); }  /* Zero*Infinity is invalid */
+	  else
+	    {
+#ifdef DENORM_OPERAND
+	      if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) &&
+		  denormal_operand() )
+		return 1;
+#endif DENORM_OPERAND
+	      reg_move(a, dest);
+	      dest->sign = sign;
+	    }
+	  return 0;
+	}
+      else if (b->tag == TW_Infinity)
+	{
+	  if (a->tag == TW_Zero)
+	    { return arith_invalid(dest); }  /* Zero*Infinity is invalid */
+	  else
+	    {
+#ifdef DENORM_OPERAND
+	      if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) &&
+		  denormal_operand() )
+		return 1;
+#endif DENORM_OPERAND
+	      reg_move(b, dest);
+	      dest->sign = sign;
+	    }
+	  return 0;
+	}
+#ifdef PARANOID
+      else
+	{
+	  EXCEPTION(EX_INTERNAL|0x102);
+	  return 1;
+	}
+#endif PARANOID
+    }
+}
diff --git a/arch/i386/math-emu/reg_norm.S b/arch/i386/math-emu/reg_norm.S
new file mode 100644
index 000000000..9b7a9d77d
--- /dev/null
+++ b/arch/i386/math-emu/reg_norm.S
@@ -0,0 +1,150 @@
+/*---------------------------------------------------------------------------+
+ |  reg_norm.S                                                               |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ | Normalize the value in a FPU_REG.                                         |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |   void normalize(FPU_REG *n)                                              |
+ |                                                                           |
+ |   void normalize_nuo(FPU_REG *n)                                          |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_asm.h"
+
+
+.text
+
+	.align 2,144
+.globl _normalize
+
+_normalize:
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	%ebx
+
+	movl	PARAM1,%ebx
+
+#ifdef PARANOID
+	cmpb	TW_Valid,TAG(%ebx)
+	je	L_ok
+
+	pushl	$0x220
+	call	_exception
+	addl	$4,%esp
+
+L_ok:
+#endif PARANOID
+
+	movl	SIGH(%ebx),%edx
+	movl	SIGL(%ebx),%eax
+
+	orl	%edx,%edx	/* ms bits */
+	js	L_done		/* Already normalized */
+	jnz	L_shift_1	/* Shift left 1 - 31 bits */
+
+	orl	%eax,%eax
+	jz	L_zero		/* The contents are zero */
+
+	movl	%eax,%edx
+	xorl	%eax,%eax
+	subl	$32,EXP(%ebx)	/* This can cause an underflow */
+
+/* We need to shift left by 1 - 31 bits */
+L_shift_1:
+	bsrl	%edx,%ecx	/* get the required shift in %ecx */
+	subl	$31,%ecx
+	negl	%ecx
+	shld	%cl,%eax,%edx
+	shl	%cl,%eax
+	subl	%ecx,EXP(%ebx)	/* This can cause an underflow */
+
+	movl	%edx,SIGH(%ebx)
+	movl	%eax,SIGL(%ebx)
+
+L_done:
+	cmpl	EXP_OVER,EXP(%ebx)
+	jge	L_overflow
+
+	cmpl	EXP_UNDER,EXP(%ebx)
+	jle	L_underflow
+
+L_exit:
+	popl	%ebx
+	leave
+	ret
+
+
+L_zero:
+	movl	EXP_UNDER,EXP(%ebx)
+	movb	TW_Zero,TAG(%ebx)
+	jmp	L_exit
+
+L_underflow:
+	push	%ebx
+	call	_arith_underflow
+	pop	%ebx
+	jmp	L_exit
+
+L_overflow:
+	push	%ebx
+	call	_arith_overflow
+	pop	%ebx
+	jmp	L_exit
+
+
+
+/* Normalise without reporting underflow or overflow */
+	.align 2,144
+.globl _normalize_nuo
+
+_normalize_nuo:
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	%ebx
+
+	movl	PARAM1,%ebx
+
+#ifdef PARANOID
+	cmpb	TW_Valid,TAG(%ebx)
+	je	L_ok_nuo
+
+	pushl	$0x221
+	call	_exception
+	addl	$4,%esp
+
+L_ok_nuo:
+#endif PARANOID
+
+	movl	SIGH(%ebx),%edx
+	movl	SIGL(%ebx),%eax
+
+	orl	%edx,%edx	/* ms bits */
+	js	L_exit		/* Already normalized */
+	jnz	L_nuo_shift_1	/* Shift left 1 - 31 bits */
+
+	orl	%eax,%eax
+	jz	L_zero		/* The contents are zero */
+
+	movl	%eax,%edx
+	xorl	%eax,%eax
+	subl	$32,EXP(%ebx)	/* This can cause an underflow */
+
+/* We need to shift left by 1 - 31 bits */
+L_nuo_shift_1:
+	bsrl	%edx,%ecx	/* get the required shift in %ecx */
+	subl	$31,%ecx
+	negl	%ecx
+	shld	%cl,%eax,%edx
+	shl	%cl,%eax
+	subl	%ecx,EXP(%ebx)	/* This can cause an underflow */
+
+	movl	%edx,SIGH(%ebx)
+	movl	%eax,SIGL(%ebx)
+	jmp	L_exit
+
+
diff --git a/arch/i386/math-emu/reg_round.S b/arch/i386/math-emu/reg_round.S
new file mode 100644
index 000000000..bd8a40dc4
--- /dev/null
+++ b/arch/i386/math-emu/reg_round.S
@@ -0,0 +1,701 @@
+	.file "reg_round.S"
+/*---------------------------------------------------------------------------+
+ |  reg_round.S                                                              |
+ |                                                                           |
+ | Rounding/truncation/etc for FPU basic arithmetic functions.               |
+ |                                                                           |
+ | Copyright (C) 1993                                                        |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ | This code has four possible entry points.                                 |
+ | The following must be entered by a jmp instruction:                       |
+ |   fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit.                  |
+ |                                                                           |
+ | The _round_reg entry point is intended to be used by C code.              |
+ | From C, call as:                                                          |
+ | void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
+ |                                                                           |
+ | For correct "up" and "down" rounding, the argument must have the correct  |
+ | sign.                                                                     |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ | Four entry points.                                                        |
+ |                                                                           |
+ | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points:     |
+ |  %eax:%ebx  64 bit significand                                            |
+ |  %edx       32 bit extension of the significand                           |
+ |  %edi       pointer to an FPU_REG for the result to be stored             |
+ |  stack      calling function must have set up a C stack frame and         |
+ |             pushed %esi, %edi, and %ebx                                   |
+ |                                                                           |
+ | Needed just for the fpu_reg_round_sqrt entry point:                       |
+ |  %cx  A control word in the same format as the FPU control word.          |
+ | Otherwise, PARAM4 must give such a value.                                 |
+ |                                                                           |
+ |                                                                           |
+ | The significand and its extension are assumed to be exact in the          |
+ | following sense:                                                          |
+ |   If the significand by itself is the exact result then the significand   |
+ |   extension (%edx) must contain 0, otherwise the significand extension    |
+ |   must be non-zero.                                                       |
+ |   If the significand extension is non-zero then the significand is        |
+ |   smaller than the magnitude of the correct exact result by an amount     |
+ |   greater than zero and less than one ls bit of the significand.          |
+ |   The significand extension is only required to have three possible       |
+ |   non-zero values:                                                        |
+ |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
+ |                                 bit smaller than the magnitude of the     |
+ |                                 true exact result.                        |
+ |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
+ |                                 smaller than the magnitude of the true    |
+ |                                 exact result.                             |
+ |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
+ |                                 bit smaller than the magnitude of the     |
+ |                                 true exact result.                        |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ |  The code in this module has become quite complex, but it should handle   |
+ |  all of the FPU flags which are set at this stage of the basic arithmetic |
+ |  computations.                                                            |
+ |  There are a few rare cases where the results are not set identically to  |
+ |  a real FPU. These require a bit more thought because at this stage the   |
+ |  results of the code here appear to be more consistent...                 |
+ |  This may be changed in a future version.                                 |
+ +---------------------------------------------------------------------------*/
+
+
+#include "fpu_asm.h"
+#include "exception.h"
+#include "control_w.h"
+
+/* Flags for FPU_bits_lost */
+#define	LOST_DOWN	$1
+#define	LOST_UP		$2
+
+/* Flags for FPU_denormal */
+#define	DENORMAL	$1
+#define	UNMASKED_UNDERFLOW $2
+
+
+#ifndef NON_REENTRANT_FPU
+/*	Make the code re-entrant by putting
+	local storage on the stack: */
+#define FPU_bits_lost	(%esp)
+#define FPU_denormal	1(%esp)
+
+#else
+/*	Not re-entrant, so we can gain speed by putting
+	local storage in a static area: */
+.data
+	.align 2,0
+FPU_bits_lost:
+	.byte	0
+FPU_denormal:
+	.byte	0
+#endif NON_REENTRANT_FPU
+
+
+.text
+	.align 2,144
+.globl fpu_reg_round
+.globl fpu_reg_round_sqrt
+.globl fpu_Arith_exit
+.globl _round_reg
+
+/* Entry point when called from C */
+_round_reg:
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM1,%edi
+	movl	SIGH(%edi),%eax
+	movl	SIGL(%edi),%ebx
+	movl	PARAM2,%edx
+	movl	PARAM3,%ecx
+	jmp	fpu_reg_round_sqrt
+
+fpu_reg_round:			/* Normal entry point */
+	movl	PARAM4,%ecx
+
+fpu_reg_round_sqrt:		/* Entry point from wm_sqrt.S */
+
+#ifndef NON_REENTRANT_FPU
+	pushl	%ebx		/* adjust the stack pointer */
+#endif NON_REENTRANT_FPU
+
+#ifdef PARANOID
+/* Cannot use this here yet */
+/*	orl	%eax,%eax */
+/*	jns	L_entry_bugged */
+#endif PARANOID
+
+	cmpl	EXP_UNDER,EXP(%edi)
+	jle	xMake_denorm			/* The number is a de-normal */
+
+	movb	$0,FPU_denormal			/* 0 -> not a de-normal */
+
+xDenorm_done:
+	movb	$0,FPU_bits_lost		/* No bits yet lost in rounding */
+
+	movl	%ecx,%esi
+	andl	CW_PC,%ecx
+	cmpl	PR_64_BITS,%ecx
+	je	LRound_To_64
+
+	cmpl	PR_53_BITS,%ecx
+	je	LRound_To_53
+
+	cmpl	PR_24_BITS,%ecx
+	je	LRound_To_24
+
+#ifdef PECULIAR_486
+/* With the precision control bits set to 01 "(reserved)", a real 80486
+   behaves as if the precision control bits were set to 11 "64 bits" */
+	cmpl	PR_RESERVED_BITS,%ecx
+	je	LRound_To_64
+#ifdef PARANOID
+	jmp	L_bugged_denorm_486
+#endif PARANOID
+#else
+#ifdef PARANOID
+	jmp	L_bugged_denorm	/* There is no bug, just a bad control word */
+#endif PARANOID
+#endif PECULIAR_486
+
+
+/* Round etc to 24 bit precision */
+LRound_To_24:
+	movl	%esi,%ecx
+	andl	CW_RC,%ecx
+	cmpl	RC_RND,%ecx
+	je	LRound_nearest_24
+
+	cmpl	RC_CHOP,%ecx
+	je	LCheck_truncate_24
+
+	cmpl	RC_UP,%ecx		/* Towards +infinity */
+	je	LUp_24
+
+	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
+	je	LDown_24
+
+#ifdef PARANOID
+	jmp	L_bugged_round24
+#endif PARANOID
+
+LUp_24:
+	cmpb	SIGN_POS,SIGN(%edi)
+	jne	LCheck_truncate_24	/* If negative then  up==truncate */
+
+	jmp	LCheck_24_round_up
+
+LDown_24:
+	cmpb	SIGN_POS,SIGN(%edi)
+	je	LCheck_truncate_24	/* If positive then  down==truncate */
+
+LCheck_24_round_up:
+	movl	%eax,%ecx
+	andl	$0x000000ff,%ecx
+	orl	%ebx,%ecx
+	orl	%edx,%ecx
+	jnz	LDo_24_round_up
+	jmp	LRe_normalise
+
+LRound_nearest_24:
+	/* Do rounding of the 24th bit if needed (nearest or even) */
+	movl	%eax,%ecx
+	andl	$0x000000ff,%ecx
+	cmpl	$0x00000080,%ecx
+	jc	LCheck_truncate_24	/* less than half, no increment needed */
+
+	jne	LGreater_Half_24	/* greater than half, increment needed */
+
+	/* Possibly half, we need to check the ls bits */
+	orl	%ebx,%ebx
+	jnz	LGreater_Half_24	/* greater than half, increment needed */
+
+	orl	%edx,%edx
+	jnz	LGreater_Half_24	/* greater than half, increment needed */
+
+	/* Exactly half, increment only if 24th bit is 1 (round to even) */
+	testl	$0x00000100,%eax
+	jz	LDo_truncate_24
+
+LGreater_Half_24:			/* Rounding: increment at the 24th bit */
+LDo_24_round_up:
+	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
+	xorl	%ebx,%ebx
+	movb	LOST_UP,FPU_bits_lost
+	addl	$0x00000100,%eax
+	jmp	LCheck_Round_Overflow
+
+LCheck_truncate_24:
+	movl	%eax,%ecx
+	andl	$0x000000ff,%ecx
+	orl	%ebx,%ecx
+	orl	%edx,%ecx
+	jz	LRe_normalise		/* No truncation needed */
+
+LDo_truncate_24:
+	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
+	xorl	%ebx,%ebx
+	movb	LOST_DOWN,FPU_bits_lost
+	jmp	LRe_normalise
+
+
+/* Round etc to 53 bit precision */
+LRound_To_53:
+	movl	%esi,%ecx
+	andl	CW_RC,%ecx
+	cmpl	RC_RND,%ecx
+	je	LRound_nearest_53
+
+	cmpl	RC_CHOP,%ecx
+	je	LCheck_truncate_53
+
+	cmpl	RC_UP,%ecx		/* Towards +infinity */
+	je	LUp_53
+
+	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
+	je	LDown_53
+
+#ifdef PARANOID
+	jmp	L_bugged_round53
+#endif PARANOID
+
+LUp_53:
+	cmpb	SIGN_POS,SIGN(%edi)
+	jne	LCheck_truncate_53	/* If negative then  up==truncate */
+
+	jmp	LCheck_53_round_up
+
+LDown_53:
+	cmpb	SIGN_POS,SIGN(%edi)
+	je	LCheck_truncate_53	/* If positive then  down==truncate */
+
+LCheck_53_round_up:
+	movl	%ebx,%ecx
+	andl	$0x000007ff,%ecx
+	orl	%edx,%ecx
+	jnz	LDo_53_round_up
+	jmp	LRe_normalise
+
+LRound_nearest_53:
+	/* Do rounding of the 53rd bit if needed (nearest or even) */
+	movl	%ebx,%ecx
+	andl	$0x000007ff,%ecx
+	cmpl	$0x00000400,%ecx
+	jc	LCheck_truncate_53	/* less than half, no increment needed */
+
+	jnz	LGreater_Half_53	/* greater than half, increment needed */
+
+	/* Possibly half, we need to check the ls bits */
+	orl	%edx,%edx
+	jnz	LGreater_Half_53	/* greater than half, increment needed */
+
+	/* Exactly half, increment only if 53rd bit is 1 (round to even) */
+	testl	$0x00000800,%ebx
+	jz	LTruncate_53
+
+LGreater_Half_53:			/* Rounding: increment at the 53rd bit */
+LDo_53_round_up:
+	movb	LOST_UP,FPU_bits_lost
+	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
+	addl	$0x00000800,%ebx
+	adcl	$0,%eax
+	jmp	LCheck_Round_Overflow
+
+LCheck_truncate_53:
+	movl	%ebx,%ecx
+	andl	$0x000007ff,%ecx
+	orl	%edx,%ecx
+	jz	LRe_normalise
+
+LTruncate_53:
+	movb	LOST_DOWN,FPU_bits_lost
+	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
+	jmp	LRe_normalise
+
+
+/* Round etc to 64 bit precision */
+LRound_To_64:
+	movl	%esi,%ecx
+	andl	CW_RC,%ecx
+	cmpl	RC_RND,%ecx
+	je	LRound_nearest_64
+
+	cmpl	RC_CHOP,%ecx
+	je	LCheck_truncate_64
+
+	cmpl	RC_UP,%ecx		/* Towards +infinity */
+	je	LUp_64
+
+	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
+	je	LDown_64
+
+#ifdef PARANOID
+	jmp	L_bugged_round64
+#endif PARANOID
+
+LUp_64:
+	cmpb	SIGN_POS,SIGN(%edi)
+	jne	LCheck_truncate_64	/* If negative then  up==truncate */
+
+	orl	%edx,%edx
+	jnz	LDo_64_round_up
+	jmp	LRe_normalise
+
+LDown_64:
+	cmpb	SIGN_POS,SIGN(%edi)
+	je	LCheck_truncate_64	/* If positive then  down==truncate */
+
+	orl	%edx,%edx
+	jnz	LDo_64_round_up
+	jmp	LRe_normalise
+
+LRound_nearest_64:
+	cmpl	$0x80000000,%edx
+	jc	LCheck_truncate_64
+
+	jne	LDo_64_round_up
+
+	/* Now test for round-to-even */
+	testb	$1,%ebx
+	jz	LCheck_truncate_64
+
+LDo_64_round_up:
+	movb	LOST_UP,FPU_bits_lost
+	addl	$1,%ebx
+	adcl	$0,%eax
+
+LCheck_Round_Overflow:
+	jnc	LRe_normalise
+
+	/* Overflow, adjust the result (significand to 1.0) */
+	rcrl	$1,%eax
+	rcrl	$1,%ebx
+	incl	EXP(%edi)
+	jmp	LRe_normalise
+
+LCheck_truncate_64:
+	orl	%edx,%edx
+	jz	LRe_normalise
+
+LTruncate_64:
+	movb	LOST_DOWN,FPU_bits_lost
+
+LRe_normalise:
+	testb	$0xff,FPU_denormal
+	jnz	xNormalise_result
+
+xL_Normalised:
+	cmpb	LOST_UP,FPU_bits_lost
+	je	xL_precision_lost_up
+
+	cmpb	LOST_DOWN,FPU_bits_lost
+	je	xL_precision_lost_down
+
+xL_no_precision_loss:
+	/* store the result */
+	movb	TW_Valid,TAG(%edi)
+
+xL_Store_significand:
+	movl	%eax,SIGH(%edi)
+	movl	%ebx,SIGL(%edi)
+
+	xorl	%eax,%eax	/* No errors detected. */
+
+	cmpl	EXP_OVER,EXP(%edi)
+	jge	L_overflow
+
+fpu_reg_round_exit:
+#ifndef NON_REENTRANT_FPU
+	popl	%ebx		/* adjust the stack pointer */
+#endif NON_REENTRANT_FPU
+
+fpu_Arith_exit:
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	leave
+	ret
+
+
+/*
+ * Set the FPU status flags to represent precision loss due to
+ * round-up.
+ */
+xL_precision_lost_up:
+	push	%eax
+	call	_set_precision_flag_up
+	popl	%eax
+	jmp	xL_no_precision_loss
+
+/*
+ * Set the FPU status flags to represent precision loss due to
+ * truncation.
+ */
+xL_precision_lost_down:
+	push	%eax
+	call	_set_precision_flag_down
+	popl	%eax
+	jmp	xL_no_precision_loss
+
+
+/*
+ * The number is a denormal (which might get rounded up to a normal)
+ * Shift the number right the required number of bits, which will
+ * have to be undone later...
+ */
+xMake_denorm:
+	/* The action to be taken depends upon whether the underflow
+	   exception is masked */
+	testb	CW_Underflow,%cl		/* Underflow mask. */
+	jz	xUnmasked_underflow		/* Do not make a denormal. */
+
+	movb	DENORMAL,FPU_denormal
+
+	pushl	%ecx		/* Save */
+	movl	EXP_UNDER+1,%ecx
+	subl	EXP(%edi),%ecx
+
+	cmpl	$64,%ecx	/* shrd only works for 0..31 bits */
+	jnc	xDenorm_shift_more_than_63
+
+	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
+	jnc	xDenorm_shift_more_than_32
+
+/*
+ * We got here without jumps by assuming that the most common requirement
+ *   is for a small de-normalising shift.
+ * Shift by [1..31] bits
+ */
+	addl	%ecx,EXP(%edi)
+	orl	%edx,%edx	/* extension */
+	setne	%ch		/* Save whether %edx is non-zero */
+	xorl	%edx,%edx
+	shrd	%cl,%ebx,%edx
+	shrd	%cl,%eax,%ebx
+	shr	%cl,%eax
+	orb	%ch,%dl
+	popl	%ecx
+	jmp	xDenorm_done
+
+/* Shift by [32..63] bits */
+xDenorm_shift_more_than_32:
+	addl	%ecx,EXP(%edi)
+	subb	$32,%cl
+	orl	%edx,%edx
+	setne	%ch
+	orb	%ch,%bl
+	xorl	%edx,%edx
+	shrd	%cl,%ebx,%edx
+	shrd	%cl,%eax,%ebx
+	shr	%cl,%eax
+	orl	%edx,%edx		/* test these 32 bits */
+	setne	%cl
+	orb	%ch,%bl
+	orb	%cl,%bl
+	movl	%ebx,%edx
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	popl	%ecx
+	jmp	xDenorm_done
+
+/* Shift by [64..) bits */
+xDenorm_shift_more_than_63:
+	cmpl	$64,%ecx
+	jne	xDenorm_shift_more_than_64
+
+/* Exactly 64 bit shift */
+	addl	%ecx,EXP(%edi)
+	xorl	%ecx,%ecx
+	orl	%edx,%edx
+	setne	%cl
+	orl	%ebx,%ebx
+	setne	%ch
+	orb	%ch,%cl
+	orb	%cl,%al
+	movl	%eax,%edx
+	xorl	%eax,%eax
+	xorl	%ebx,%ebx
+	popl	%ecx
+	jmp	xDenorm_done
+
+xDenorm_shift_more_than_64:
+	movl	EXP_UNDER+1,EXP(%edi)
+/* This is easy, %eax must be non-zero, so.. */
+	movl	$1,%edx
+	xorl	%eax,%eax
+	xorl	%ebx,%ebx
+	popl	%ecx
+	jmp	xDenorm_done
+
+
+xUnmasked_underflow:
+	movb	UNMASKED_UNDERFLOW,FPU_denormal
+	jmp	xDenorm_done
+
+
+/* Undo the de-normalisation. */
+xNormalise_result:
+	cmpb	UNMASKED_UNDERFLOW,FPU_denormal
+	je	xSignal_underflow
+
+/* The number must be a denormal if we got here. */
+#ifdef PARANOID
+	/* But check it... just in case. */
+	cmpl	EXP_UNDER+1,EXP(%edi)
+	jne	L_norm_bugged
+#endif PARANOID
+
+#ifdef PECULIAR_486
+	/*
+	 * This implements a special feature of 80486 behaviour.
+	 * Underflow will be signalled even if the number is
+	 * not a denormal after rounding.
+	 * This difference occurs only for masked underflow, and not
+	 * in the unmasked case.
+	 * Actual 80486 behaviour differs from this in some circumstances.
+	 */
+	orl	%eax,%eax		/* ms bits */
+	js	LNormalise_shift_done	/* Will be masked underflow */
+#endif PECULIAR_486
+
+	orl	%eax,%eax		/* ms bits */
+	js	xL_Normalised		/* No longer a denormal */
+
+	jnz	LNormalise_shift_up_to_31	/* Shift left 0 - 31 bits */
+
+	orl	%ebx,%ebx
+	jz	L_underflow_to_zero	/* The contents are zero */
+
+/* Shift left 32 - 63 bits */
+	movl	%ebx,%eax
+	xorl	%ebx,%ebx
+	subl	$32,EXP(%edi)
+
+LNormalise_shift_up_to_31:
+	bsrl	%eax,%ecx	/* get the required shift in %ecx */
+	subl	$31,%ecx
+	negl	%ecx
+	shld	%cl,%ebx,%eax
+	shl	%cl,%ebx
+	subl	%ecx,EXP(%edi)
+
+LNormalise_shift_done:
+	testb	$0xff,FPU_bits_lost	/* bits lost == underflow */
+	jz	xL_Normalised
+
+	/* There must be a masked underflow */
+	push	%eax
+	pushl	EX_Underflow
+	call	_exception
+	popl	%eax
+	popl	%eax
+	jmp	xL_Normalised
+
+
+/*
+ * The operations resulted in a number too small to represent.
+ * Masked response.
+ */
+L_underflow_to_zero:
+	push	%eax
+	call	_set_precision_flag_down
+	popl	%eax
+
+	push	%eax
+	pushl	EX_Underflow
+	call	_exception
+	popl	%eax
+	popl	%eax
+
+/* Reduce the exponent to EXP_UNDER */
+	movl	EXP_UNDER,EXP(%edi)
+	movb	TW_Zero,TAG(%edi)
+	jmp	xL_Store_significand
+
+
+/* The operations resulted in a number too large to represent. */
+L_overflow:
+	push	%edi
+	call	_arith_overflow
+	pop	%edi
+	jmp	fpu_reg_round_exit
+
+
+xSignal_underflow:
+	/* The number may have been changed to a non-denormal */
+	/* by the rounding operations. */
+	cmpl	EXP_UNDER,EXP(%edi)
+	jle	xDo_unmasked_underflow
+
+	jmp	xL_Normalised
+
+xDo_unmasked_underflow:
+	/* Increase the exponent by the magic number */
+	addl	$(3*(1<<13)),EXP(%edi)
+	push	%eax
+	pushl	EX_Underflow
+	call	EXCEPTION
+	popl	%eax
+	popl	%eax
+	jmp	xL_Normalised
+
+
+#ifdef PARANOID
+#ifdef PECULIAR_486
+L_bugged_denorm_486:
+	pushl	EX_INTERNAL|0x236
+	call	EXCEPTION
+	popl	%ebx
+	jmp	L_exception_exit
+#else
+L_bugged_denorm:
+	pushl	EX_INTERNAL|0x230
+	call	EXCEPTION
+	popl	%ebx
+	jmp	L_exception_exit
+#endif PECULIAR_486
+
+L_bugged_round24:
+	pushl	EX_INTERNAL|0x231
+	call	EXCEPTION
+	popl	%ebx
+	jmp	L_exception_exit
+
+L_bugged_round53:
+	pushl	EX_INTERNAL|0x232
+	call	EXCEPTION
+	popl	%ebx
+	jmp	L_exception_exit
+
+L_bugged_round64:
+	pushl	EX_INTERNAL|0x233
+	call	EXCEPTION
+	popl	%ebx
+	jmp	L_exception_exit
+
+L_norm_bugged:
+	pushl	EX_INTERNAL|0x234
+	call	EXCEPTION
+	popl	%ebx
+	jmp	L_exception_exit
+
+L_entry_bugged:
+	pushl	EX_INTERNAL|0x235
+	call	EXCEPTION
+	popl	%ebx
+L_exception_exit:
+	mov	$1,%eax
+	jmp	fpu_reg_round_exit
+#endif PARANOID
diff --git a/arch/i386/math-emu/reg_u_add.S b/arch/i386/math-emu/reg_u_add.S
new file mode 100644
index 000000000..4410f8fd4
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_add.S
@@ -0,0 +1,189 @@
+	.file	"reg_u_add.S"
+/*---------------------------------------------------------------------------+
+ |  reg_u_add.S                                                              |
+ |                                                                           |
+ | Add two valid (TW_Valid) FPU_REG numbers, of the same sign, and put the   |
+ |   result in a destination FPU_REG.                                        |
+ |                                                                           |
+ | Copyright (C) 1992,1993                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |   void reg_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ,             |
+ |                                                int control_w)             |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*
+ |    Kernel addition routine reg_u_add(reg *arg1, reg *arg2, reg *answ).
+ |    Takes two valid reg f.p. numbers (TW_Valid), which are
+ |    treated as unsigned numbers,
+ |    and returns their sum as a TW_Valid or TW_S f.p. number.
+ |    The returned number is normalized.
+ |    Basic checks are performed if PARANOID is defined.
+ */
+
+#include "exception.h"
+#include "fpu_asm.h"
+#include "control_w.h"
+
+.text
+	.align 2,144
+.globl _reg_u_add
+_reg_u_add:
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM1,%esi		/* source 1 */
+	movl	PARAM2,%edi		/* source 2 */
+
+#ifdef DENORM_OPERAND
+	cmpl	EXP_UNDER,EXP(%esi)
+	jg	xOp1_not_denorm
+
+	call	_denormal_operand
+	orl	%eax,%eax
+	jnz	fpu_Arith_exit
+
+xOp1_not_denorm:
+	cmpl	EXP_UNDER,EXP(%edi)
+	jg	xOp2_not_denorm
+
+	call	_denormal_operand
+	orl	%eax,%eax
+	jnz	fpu_Arith_exit
+
+xOp2_not_denorm:
+#endif DENORM_OPERAND
+
+	movl	EXP(%esi),%ecx
+	subl	EXP(%edi),%ecx		/* exp1 - exp2 */
+	jge	L_arg1_larger
+
+	/* num1 is smaller */
+	movl	SIGL(%esi),%ebx
+	movl	SIGH(%esi),%eax
+
+	movl	%edi,%esi
+	negw	%cx
+	jmp	L_accum_loaded
+
+L_arg1_larger:
+	/* num1 has larger or equal exponent */
+	movl	SIGL(%edi),%ebx
+	movl	SIGH(%edi),%eax
+
+L_accum_loaded:
+	movl	PARAM3,%edi		/* destination */
+/*	movb	SIGN(%esi),%dl
+	movb	%dl,SIGN(%edi) */	/* Copy the sign from the first arg */
+
+
+	movl	EXP(%esi),%edx
+	movl	%edx,EXP(%edi)		/* Copy exponent to destination */
+
+	xorl	%edx,%edx		/* clear the extension */
+
+#ifdef PARANOID
+	testl	$0x80000000,%eax
+	je	L_bugged
+
+	testl	$0x80000000,SIGH(%esi)
+	je	L_bugged
+#endif PARANOID
+
+/* The number to be shifted is in %eax:%ebx:%edx */
+	cmpw	$32,%cx		/* shrd only works for 0..31 bits */
+	jnc	L_more_than_31
+
+/* less than 32 bits */
+	shrd	%cl,%ebx,%edx
+	shrd	%cl,%eax,%ebx
+	shr	%cl,%eax
+	jmp	L_shift_done
+
+L_more_than_31:
+	cmpw	$64,%cx
+	jnc	L_more_than_63
+
+	subb	$32,%cl
+	jz	L_exactly_32
+
+	shrd	%cl,%eax,%edx
+	shr	%cl,%eax
+	orl	%ebx,%ebx
+	jz	L_more_31_no_low	/* none of the lowest bits is set */
+
+	orl	$1,%edx			/* record the fact in the extension */
+
+L_more_31_no_low:
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	jmp	L_shift_done
+
+L_exactly_32:
+	movl	%ebx,%edx
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	jmp	L_shift_done
+
+L_more_than_63:
+	cmpw	$65,%cx
+	jnc	L_more_than_64
+
+	movl	%eax,%edx
+	orl	%ebx,%ebx
+	jz	L_more_63_no_low
+
+	orl	$1,%edx
+	jmp	L_more_63_no_low
+
+L_more_than_64:
+	movl	$1,%edx		/* The shifted nr always at least one '1' */
+
+L_more_63_no_low:
+	xorl	%ebx,%ebx
+	xorl	%eax,%eax
+
+L_shift_done:
+	/* Now do the addition */
+	addl	SIGL(%esi),%ebx
+	adcl	SIGH(%esi),%eax
+	jnc	L_round_the_result
+
+	/* Overflow, adjust the result */
+	rcrl	$1,%eax
+	rcrl	$1,%ebx
+	rcrl	$1,%edx
+	jnc	L_no_bit_lost
+
+	orl	$1,%edx
+
+L_no_bit_lost:
+	incl	EXP(%edi)
+
+L_round_the_result:
+	jmp	fpu_reg_round	/* Round the result */
+
+
+
+#ifdef PARANOID
+/* If we ever get here then we have problems! */
+L_bugged:
+	pushl	EX_INTERNAL|0x201
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_exit
+#endif PARANOID
+
+
+L_exit:
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	leave
+	ret
diff --git a/arch/i386/math-emu/reg_u_div.S b/arch/i386/math-emu/reg_u_div.S
new file mode 100644
index 000000000..328e9116e
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_div.S
@@ -0,0 +1,477 @@
+	.file	"reg_u_div.S"
+/*---------------------------------------------------------------------------+
+ |  reg_u_div.S                                                              |
+ |                                                                           |
+ | Core division routines                                                    |
+ |                                                                           |
+ | Copyright (C) 1992,1993                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ |  Kernel for the division routines.                                        |
+ |                                                                           |
+ |  void reg_u_div(FPU_REG *a, FPU_REG *a,                                   |
+ |                 FPU_REG *dest, unsigned int control_word)                 |
+ |                                                                           |
+ |  Does not compute the destination exponent, but does adjust it.           |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "fpu_asm.h"
+#include "control_w.h"
+
+
+/* #define	dSIGL(x)	(x) */
+/* #define	dSIGH(x)	4(x) */
+
+
+#ifndef NON_REENTRANT_FPU
+/*
+	Local storage on the stack:
+	Result:		FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
+	Overflow flag:	ovfl_flag
+ */
+#define FPU_accum_3	-4(%ebp)
+#define FPU_accum_2	-8(%ebp)
+#define FPU_accum_1	-12(%ebp)
+#define FPU_accum_0	-16(%ebp)
+#define FPU_result_1	-20(%ebp)
+#define FPU_result_2	-24(%ebp)
+#define FPU_ovfl_flag	-28(%ebp)
+
+#else
+.data
+/*
+	Local storage in a static area:
+	Result:		FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
+	Overflow flag:	ovfl_flag
+ */
+	.align 2,0
+FPU_accum_3:
+	.long	0
+FPU_accum_2:
+	.long	0
+FPU_accum_1:
+	.long	0
+FPU_accum_0:
+	.long	0
+FPU_result_1:
+	.long	0
+FPU_result_2:
+	.long	0
+FPU_ovfl_flag:
+	.byte	0
+#endif NON_REENTRANT_FPU
+
+
+.text
+	.align 2,144
+
+.globl _reg_u_div
+
+.globl _divide_kernel
+
+_reg_u_div:
+	pushl	%ebp
+	movl	%esp,%ebp
+#ifndef NON_REENTRANT_FPU
+	subl	$28,%esp
+#endif NON_REENTRANT_FPU
+
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM1,%esi	/* pointer to num */
+	movl	PARAM2,%ebx	/* pointer to denom */
+	movl	PARAM3,%edi	/* pointer to answer */
+
+#ifdef DENORM_OPERAND
+	movl	EXP(%esi),%eax
+	cmpl	EXP_UNDER,%eax
+	jg	xOp1_not_denorm
+
+	call	_denormal_operand
+	orl	%eax,%eax
+	jnz	fpu_Arith_exit
+
+xOp1_not_denorm:
+	movl	EXP(%ebx),%eax
+	cmpl	EXP_UNDER,%eax
+	jg	xOp2_not_denorm
+
+	call	_denormal_operand
+	orl	%eax,%eax
+	jnz	fpu_Arith_exit
+
+xOp2_not_denorm:
+#endif DENORM_OPERAND
+
+_divide_kernel:
+#ifdef PARANOID
+/*	testl	$0x80000000, SIGH(%esi)	// Dividend */
+/*	je	L_bugged */
+	testl	$0x80000000, SIGH(%ebx)	/* Divisor */
+	je	L_bugged
+#endif PARANOID
+
+/* Check if the divisor can be treated as having just 32 bits */
+	cmpl	$0,SIGL(%ebx)
+	jnz	L_Full_Division	/* Can't do a quick divide */
+
+/* We should be able to zip through the division here */
+	movl	SIGH(%ebx),%ecx	/* The divisor */
+	movl	SIGH(%esi),%edx	/* Dividend */
+	movl	SIGL(%esi),%eax	/* Dividend */
+
+	cmpl	%ecx,%edx
+	setaeb	FPU_ovfl_flag	/* Keep a record */
+	jb	L_no_adjust
+
+	subl	%ecx,%edx	/* Prevent the overflow */
+
+L_no_adjust:
+	/* Divide the 64 bit number by the 32 bit denominator */
+	divl	%ecx
+	movl	%eax,FPU_result_2
+
+	/* Work on the remainder of the first division */
+	xorl	%eax,%eax
+	divl	%ecx
+	movl	%eax,FPU_result_1
+
+	/* Work on the remainder of the 64 bit division */
+	xorl	%eax,%eax
+	divl	%ecx
+
+	testb	$255,FPU_ovfl_flag	/* was the num > denom ? */
+	je	L_no_overflow
+
+	/* Do the shifting here */
+	/* increase the exponent */
+	incl	EXP(%edi)
+
+	/* shift the mantissa right one bit */
+	stc			/* To set the ms bit */
+	rcrl	FPU_result_2
+	rcrl	FPU_result_1
+	rcrl	%eax
+
+L_no_overflow:
+	jmp	LRound_precision	/* Do the rounding as required */
+
+
+/*---------------------------------------------------------------------------+
+ |  Divide:   Return  arg1/arg2 to arg3.                                     |
+ |                                                                           |
+ |  This routine does not use the exponents of arg1 and arg2, but does       |
+ |  adjust the exponent of arg3.                                             |
+ |                                                                           |
+ |  The maximum returned value is (ignoring exponents)                       |
+ |               .ffffffff ffffffff                                          |
+ |               ------------------  =  1.ffffffff fffffffe                  |
+ |               .80000000 00000000                                          |
+ | and the minimum is                                                        |
+ |               .80000000 00000000                                          |
+ |               ------------------  =  .80000000 00000001   (rounded)       |
+ |               .ffffffff ffffffff                                          |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+
+L_Full_Division:
+	/* Save extended dividend in local register */
+	movl	SIGL(%esi),%eax
+	movl	%eax,FPU_accum_2
+	movl	SIGH(%esi),%eax
+	movl	%eax,FPU_accum_3
+	xorl	%eax,%eax
+	movl	%eax,FPU_accum_1	/* zero the extension */
+	movl	%eax,FPU_accum_0	/* zero the extension */
+
+	movl	SIGL(%esi),%eax	/* Get the current num */
+	movl	SIGH(%esi),%edx
+
+/*----------------------------------------------------------------------*/
+/* Initialization done.
+   Do the first 32 bits. */
+
+	movb	$0,FPU_ovfl_flag
+	cmpl	SIGH(%ebx),%edx	/* Test for imminent overflow */
+	jb	LLess_than_1
+	ja	LGreater_than_1
+
+	cmpl	SIGL(%ebx),%eax
+	jb	LLess_than_1
+
+LGreater_than_1:
+/* The dividend is greater or equal, would cause overflow */
+	setaeb	FPU_ovfl_flag		/* Keep a record */
+
+	subl	SIGL(%ebx),%eax
+	sbbl	SIGH(%ebx),%edx	/* Prevent the overflow */
+	movl	%eax,FPU_accum_2
+	movl	%edx,FPU_accum_3
+
+LLess_than_1:
+/* At this point, we have a dividend < divisor, with a record of
+   adjustment in FPU_ovfl_flag */
+
+	/* We will divide by a number which is too large */
+	movl	SIGH(%ebx),%ecx
+	addl	$1,%ecx
+	jnc	LFirst_div_not_1
+
+	/* here we need to divide by 100000000h,
+	   i.e., no division at all.. */
+	mov	%edx,%eax
+	jmp	LFirst_div_done
+
+LFirst_div_not_1:
+	divl	%ecx		/* Divide the numerator by the augmented
+				   denom ms dw */
+
+LFirst_div_done:
+	movl	%eax,FPU_result_2	/* Put the result in the answer */
+
+	mull	SIGH(%ebx)	/* mul by the ms dw of the denom */
+
+	subl	%eax,FPU_accum_2	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_3
+
+	movl	FPU_result_2,%eax	/* Get the result back */
+	mull	SIGL(%ebx)	/* now mul the ls dw of the denom */
+
+	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_2
+	sbbl	$0,FPU_accum_3
+	je	LDo_2nd_32_bits		/* Must check for non-zero result here */
+
+#ifdef PARANOID
+	jb	L_bugged_1
+#endif PARANOID
+
+	/* need to subtract another once of the denom */
+	incl	FPU_result_2	/* Correct the answer */
+
+	movl	SIGL(%ebx),%eax
+	movl	SIGH(%ebx),%edx
+	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_2
+
+#ifdef PARANOID
+	sbbl	$0,FPU_accum_3
+	jne	L_bugged_1	/* Must check for non-zero result here */
+#endif PARANOID
+
+/*----------------------------------------------------------------------*/
+/* Half of the main problem is done, there is just a reduced numerator
+   to handle now.
+   Work with the second 32 bits, FPU_accum_0 not used from now on */
+LDo_2nd_32_bits:
+	movl	FPU_accum_2,%edx	/* get the reduced num */
+	movl	FPU_accum_1,%eax
+
+	/* need to check for possible subsequent overflow */
+	cmpl	SIGH(%ebx),%edx
+	jb	LDo_2nd_div
+	ja	LPrevent_2nd_overflow
+
+	cmpl	SIGL(%ebx),%eax
+	jb	LDo_2nd_div
+
+LPrevent_2nd_overflow:
+/* The numerator is greater or equal, would cause overflow */
+	/* prevent overflow */
+	subl	SIGL(%ebx),%eax
+	sbbl	SIGH(%ebx),%edx
+	movl	%edx,FPU_accum_2
+	movl	%eax,FPU_accum_1
+
+	incl	FPU_result_2	/* Reflect the subtraction in the answer */
+
+#ifdef PARANOID
+	je	L_bugged_2	/* Can't bump the result to 1.0 */
+#endif PARANOID
+
+LDo_2nd_div:
+	cmpl	$0,%ecx		/* augmented denom msw */
+	jnz	LSecond_div_not_1
+
+	/* %ecx == 0, we are dividing by 1.0 */
+	mov	%edx,%eax
+	jmp	LSecond_div_done
+
+LSecond_div_not_1:
+	divl	%ecx		/* Divide the numerator by the denom ms dw */
+
+LSecond_div_done:
+	movl	%eax,FPU_result_1	/* Put the result in the answer */
+
+	mull	SIGH(%ebx)	/* mul by the ms dw of the denom */
+
+	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_2
+
+#ifdef PARANOID
+	jc	L_bugged_2
+#endif PARANOID
+
+	movl	FPU_result_1,%eax	/* Get the result back */
+	mull	SIGL(%ebx)	/* now mul the ls dw of the denom */
+
+	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_1	/* Subtract from the num local reg */
+	sbbl	$0,FPU_accum_2
+
+#ifdef PARANOID
+	jc	L_bugged_2
+#endif PARANOID
+
+	jz	LDo_3rd_32_bits
+
+#ifdef PARANOID
+	cmpl	$1,FPU_accum_2
+	jne	L_bugged_2
+#endif PARANOID
+
+	/* need to subtract another once of the denom */
+	movl	SIGL(%ebx),%eax
+	movl	SIGH(%ebx),%edx
+	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
+	sbbl	%edx,FPU_accum_1
+	sbbl	$0,FPU_accum_2
+
+#ifdef PARANOID
+	jc	L_bugged_2
+	jne	L_bugged_2
+#endif PARANOID
+
+	addl	$1,FPU_result_1	/* Correct the answer */
+	adcl	$0,FPU_result_2
+
+#ifdef PARANOID
+	jc	L_bugged_2	/* Must check for non-zero result here */
+#endif PARANOID
+
+/*----------------------------------------------------------------------*/
+/* The division is essentially finished here, we just need to perform
+   tidying operations.
+   Deal with the 3rd 32 bits */
+LDo_3rd_32_bits:
+	movl	FPU_accum_1,%edx		/* get the reduced num */
+	movl	FPU_accum_0,%eax
+
+	/* need to check for possible subsequent overflow */
+	cmpl	SIGH(%ebx),%edx	/* denom */
+	jb	LRound_prep
+	ja	LPrevent_3rd_overflow
+
+	cmpl	SIGL(%ebx),%eax	/* denom */
+	jb	LRound_prep
+
+LPrevent_3rd_overflow:
+	/* prevent overflow */
+	subl	SIGL(%ebx),%eax
+	sbbl	SIGH(%ebx),%edx
+	movl	%edx,FPU_accum_1
+	movl	%eax,FPU_accum_0
+
+	addl	$1,FPU_result_1	/* Reflect the subtraction in the answer */
+	adcl	$0,FPU_result_2
+	jne	LRound_prep
+	jnc	LRound_prep
+
+	/* This is a tricky spot, there is an overflow of the answer */
+	movb	$255,FPU_ovfl_flag		/* Overflow -> 1.000 */
+
+LRound_prep:
+/*
+ * Prepare for rounding.
+ * To test for rounding, we just need to compare 2*accum with the
+ * denom.
+ */
+	movl	FPU_accum_0,%ecx
+	movl	FPU_accum_1,%edx
+	movl	%ecx,%eax
+	orl	%edx,%eax
+	jz	LRound_ovfl		/* The accumulator contains zero. */
+
+	/* Multiply by 2 */
+	clc
+	rcll	$1,%ecx
+	rcll	$1,%edx
+	jc	LRound_large		/* No need to compare, denom smaller */
+
+	subl	SIGL(%ebx),%ecx
+	sbbl	SIGH(%ebx),%edx
+	jnc	LRound_not_small
+
+	movl	$0x70000000,%eax	/* Denom was larger */
+	jmp	LRound_ovfl
+
+LRound_not_small:
+	jnz	LRound_large
+
+	movl	$0x80000000,%eax	/* Remainder was exactly 1/2 denom */
+	jmp	LRound_ovfl
+
+LRound_large:
+	movl	$0xff000000,%eax	/* Denom was smaller */
+
+LRound_ovfl:
+/* We are now ready to deal with rounding, but first we must get
+   the bits properly aligned */
+	testb	$255,FPU_ovfl_flag	/* was the num > denom ? */
+	je	LRound_precision
+
+	incl	EXP(%edi)
+
+	/* shift the mantissa right one bit */
+	stc			/* Will set the ms bit */
+	rcrl	FPU_result_2
+	rcrl	FPU_result_1
+	rcrl	%eax
+
+/* Round the result as required */
+LRound_precision:
+	decl	EXP(%edi)	/* binary point between 1st & 2nd bits */
+
+	movl	%eax,%edx
+	movl	FPU_result_1,%ebx
+	movl	FPU_result_2,%eax
+	jmp	fpu_reg_round
+
+
+#ifdef PARANOID
+/* The logic is wrong if we got here */
+L_bugged:
+	pushl	EX_INTERNAL|0x202
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_exit
+
+L_bugged_1:
+	pushl	EX_INTERNAL|0x203
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_exit
+
+L_bugged_2:
+	pushl	EX_INTERNAL|0x204
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_exit
+
+L_exit:
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+
+	leave
+	ret
+#endif PARANOID
diff --git a/arch/i386/math-emu/reg_u_mul.S b/arch/i386/math-emu/reg_u_mul.S
new file mode 100644
index 000000000..8250666bd
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_mul.S
@@ -0,0 +1,163 @@
+	.file	"reg_u_mul.S"
+/*---------------------------------------------------------------------------+
+ |  reg_u_mul.S                                                              |
+ |                                                                           |
+ | Core multiplication routine                                               |
+ |                                                                           |
+ | Copyright (C) 1992,1993                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ |   Basic multiplication routine.                                           |
+ |   Does not check the resulting exponent for overflow/underflow            |
+ |                                                                           |
+ |   reg_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw);         |
+ |                                                                           |
+ |   Internal working is at approx 128 bits.                                 |
+ |   Result is rounded to nearest 53 or 64 bits, using "nearest or even".    |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "fpu_asm.h"
+#include "control_w.h"
+
+
+
+#ifndef NON_REENTRANT_FPU
+/*  Local storage on the stack: */
+#define FPU_accum_0	-4(%ebp)	/* ms word */
+#define FPU_accum_1	-8(%ebp)
+
+#else
+/*  Local storage in a static area: */
+.data
+	.align 4,0
+FPU_accum_0:
+	.long	0
+FPU_accum_1:
+	.long	0
+#endif NON_REENTRANT_FPU
+
+
+.text
+	.align 2,144
+
+.globl _reg_u_mul
+_reg_u_mul:
+	pushl	%ebp
+	movl	%esp,%ebp
+#ifndef NON_REENTRANT_FPU
+	subl	$8,%esp
+#endif NON_REENTRANT_FPU
+
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM1,%esi
+	movl	PARAM2,%edi
+
+#ifdef PARANOID
+	testl	$0x80000000,SIGH(%esi)
+	jz	L_bugged
+	testl	$0x80000000,SIGH(%edi)
+	jz	L_bugged
+#endif PARANOID
+
+#ifdef DENORM_OPERAND
+	movl	EXP(%esi),%eax
+	cmpl	EXP_UNDER,%eax
+	jg	xOp1_not_denorm
+
+	call	_denormal_operand
+	orl	%eax,%eax
+	jnz	fpu_Arith_exit
+
+xOp1_not_denorm:
+	movl	EXP(%edi),%eax
+	cmpl	EXP_UNDER,%eax
+	jg	xOp2_not_denorm
+
+	call	_denormal_operand
+	orl	%eax,%eax
+	jnz	fpu_Arith_exit
+
+xOp2_not_denorm:
+#endif DENORM_OPERAND
+
+	xorl	%ecx,%ecx
+	xorl	%ebx,%ebx
+
+	movl	SIGL(%esi),%eax
+	mull	SIGL(%edi)
+	movl	%eax,FPU_accum_0
+	movl	%edx,FPU_accum_1
+
+	movl	SIGL(%esi),%eax
+	mull	SIGH(%edi)
+	addl	%eax,FPU_accum_1
+	adcl	%edx,%ebx
+/*	adcl	$0,%ecx		// overflow here is not possible */
+
+	movl	SIGH(%esi),%eax
+	mull	SIGL(%edi)
+	addl	%eax,FPU_accum_1
+	adcl	%edx,%ebx
+	adcl	$0,%ecx
+
+	movl	SIGH(%esi),%eax
+	mull	SIGH(%edi)
+	addl	%eax,%ebx
+	adcl	%edx,%ecx
+
+	movl	EXP(%esi),%eax	/* Compute the exponent */
+	addl	EXP(%edi),%eax
+	subl	EXP_BIAS-1,%eax
+
+/*  Have now finished with the sources */
+	movl	PARAM3,%edi	/* Point to the destination */
+	movl	%eax,EXP(%edi)
+
+/*  Now make sure that the result is normalized */
+	testl	$0x80000000,%ecx
+	jnz	LResult_Normalised
+
+	/* Normalize by shifting left one bit */
+	shll	$1,FPU_accum_0
+	rcll	$1,FPU_accum_1
+	rcll	$1,%ebx
+	rcll	$1,%ecx
+	decl	EXP(%edi)
+
+LResult_Normalised:
+	movl	FPU_accum_0,%eax
+	movl	FPU_accum_1,%edx
+	orl	%eax,%eax
+	jz	L_extent_zero
+
+	orl	$1,%edx
+
+L_extent_zero:
+	movl	%ecx,%eax
+	jmp	fpu_reg_round
+
+
+#ifdef PARANOID
+L_bugged:
+	pushl	EX_INTERNAL|0x205
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_exit
+
+L_exit:
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	leave
+	ret
+#endif PARANOID
+
diff --git a/arch/i386/math-emu/reg_u_sub.S b/arch/i386/math-emu/reg_u_sub.S
new file mode 100644
index 000000000..fbec17dfb
--- /dev/null
+++ b/arch/i386/math-emu/reg_u_sub.S
@@ -0,0 +1,292 @@
+	.file	"reg_u_sub.S"
+/*---------------------------------------------------------------------------+
+ |  reg_u_sub.S                                                              |
+ |                                                                           |
+ | Core floating point subtraction routine.                                  |
+ |                                                                           |
+ | Copyright (C) 1992,1993                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |   void reg_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ,             |
+ |                                                int control_w)             |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*
+ |    Kernel subtraction routine reg_u_sub(reg *arg1, reg *arg2, reg *answ).
+ |    Takes two valid reg f.p. numbers (TW_Valid), which are
+ |    treated as unsigned numbers,
+ |    and returns their difference as a TW_Valid or TW_Zero f.p.
+ |    number.
+ |    The first number (arg1) must be the larger.
+ |    The returned number is normalized.
+ |    Basic checks are performed if PARANOID is defined.
+ */
+
+#include "exception.h"
+#include "fpu_asm.h"
+#include "control_w.h"
+
+.text
+	.align 2,144
+.globl _reg_u_sub
+_reg_u_sub:
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM1,%esi	/* source 1 */
+	movl	PARAM2,%edi	/* source 2 */
+
+#ifdef DENORM_OPERAND
+	cmpl	EXP_UNDER,EXP(%esi)
+	jg	xOp1_not_denorm
+
+	call	_denormal_operand
+	orl	%eax,%eax
+	jnz	fpu_Arith_exit
+
+xOp1_not_denorm:
+	cmpl	EXP_UNDER,EXP(%edi)
+	jg	xOp2_not_denorm
+
+	call	_denormal_operand
+	orl	%eax,%eax
+	jnz	fpu_Arith_exit
+
+xOp2_not_denorm:
+#endif DENORM_OPERAND
+
+	movl	EXP(%esi),%ecx
+	subl	EXP(%edi),%ecx	/* exp1 - exp2 */
+
+#ifdef PARANOID
+	/* source 2 is always smaller than source 1 */
+	js	L_bugged_1
+
+	testl	$0x80000000,SIGH(%edi)	/* The args are assumed to be be normalized */
+	je	L_bugged_2
+
+	testl	$0x80000000,SIGH(%esi)
+	je	L_bugged_2
+#endif PARANOID
+
+/*--------------------------------------+
+ |	Form a register holding the     |
+ |	smaller number                  |
+ +--------------------------------------*/
+	movl	SIGH(%edi),%eax	/* register ms word */
+	movl	SIGL(%edi),%ebx	/* register ls word */
+
+	movl	PARAM3,%edi	/* destination */
+	movl	EXP(%esi),%edx
+	movl	%edx,EXP(%edi)	/* Copy exponent to destination */
+/*	movb	SIGN(%esi),%dl
+	movb	%dl,SIGN(%edi) */	/* Copy the sign from the first arg */
+
+	xorl	%edx,%edx	/* register extension */
+
+/*--------------------------------------+
+ |	Shift the temporary register	|
+ |      right the required number of	|
+ |	places.				|
+ +--------------------------------------*/
+L_shift_r:
+	cmpl	$32,%ecx		/* shrd only works for 0..31 bits */
+	jnc	L_more_than_31
+
+/* less than 32 bits */
+	shrd	%cl,%ebx,%edx
+	shrd	%cl,%eax,%ebx
+	shr	%cl,%eax
+	jmp	L_shift_done
+
+L_more_than_31:
+	cmpl	$64,%ecx
+	jnc	L_more_than_63
+
+	subb	$32,%cl
+	jz	L_exactly_32
+
+	shrd	%cl,%eax,%edx
+	shr	%cl,%eax
+	orl	%ebx,%ebx
+	jz	L_more_31_no_low	/* none of the lowest bits is set */
+
+	orl	$1,%edx			/* record the fact in the extension */
+
+L_more_31_no_low:
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	jmp	L_shift_done
+
+L_exactly_32:
+	movl	%ebx,%edx
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	jmp	L_shift_done
+
+L_more_than_63:
+	cmpw	$65,%cx
+	jnc	L_more_than_64
+
+	/* Shift right by 64 bits */
+	movl	%eax,%edx
+	orl	%ebx,%ebx
+	jz	L_more_63_no_low
+
+	orl	$1,%edx
+	jmp	L_more_63_no_low
+
+L_more_than_64:
+	jne	L_more_than_65
+
+	/* Shift right by 65 bits */
+	/* Carry is clear if we get here */
+	movl	%eax,%edx
+	rcrl	%edx
+	jnc	L_shift_65_nc
+
+	orl	$1,%edx
+	jmp	L_more_63_no_low
+
+L_shift_65_nc:
+	orl	%ebx,%ebx
+	jz	L_more_63_no_low
+
+	orl	$1,%edx
+	jmp	L_more_63_no_low
+
+L_more_than_65:
+	movl	$1,%edx		/* The shifted nr always at least one '1' */
+
+L_more_63_no_low:
+	xorl	%ebx,%ebx
+	xorl	%eax,%eax
+
+L_shift_done:
+L_subtr:
+/*------------------------------+
+ |	Do the subtraction	|
+ +------------------------------*/
+	xorl	%ecx,%ecx
+	subl	%edx,%ecx
+	movl	%ecx,%edx
+	movl	SIGL(%esi),%ecx
+	sbbl	%ebx,%ecx
+	movl	%ecx,%ebx
+	movl	SIGH(%esi),%ecx
+	sbbl	%eax,%ecx
+	movl	%ecx,%eax
+
+#ifdef PARANOID
+	/* We can never get a borrow */
+	jc	L_bugged
+#endif PARANOID
+
+/*--------------------------------------+
+ |	Normalize the result		|
+ +--------------------------------------*/
+	testl	$0x80000000,%eax
+	jnz	L_round		/* no shifting needed */
+
+	orl	%eax,%eax
+	jnz	L_shift_1	/* shift left 1 - 31 bits */
+
+	orl	%ebx,%ebx
+	jnz	L_shift_32	/* shift left 32 - 63 bits */
+
+/*
+ *	 A rare case, the only one which is non-zero if we got here
+ *         is:           1000000 .... 0000
+ *                      -0111111 .... 1111 1
+ *                       -------------------- 
+ *                       0000000 .... 0000 1 
+ */
+
+	cmpl	$0x80000000,%edx
+	jnz	L_must_be_zero
+
+	/* Shift left 64 bits */
+	subl	$64,EXP(%edi)
+	xchg	%edx,%eax
+	jmp	fpu_reg_round
+
+L_must_be_zero:
+#ifdef PARANOID
+	orl	%edx,%edx
+	jnz	L_bugged_3
+#endif PARANOID
+
+	/* The result is zero */
+	movb	TW_Zero,TAG(%edi)
+	movl	$0,EXP(%edi)		/* exponent */
+	movl	$0,SIGL(%edi)
+	movl	$0,SIGH(%edi)
+	jmp	L_exit		/* %eax contains zero */
+
+L_shift_32:
+	movl	%ebx,%eax
+	movl	%edx,%ebx
+	movl	$0,%edx
+	subl	$32,EXP(%edi)	/* Can get underflow here */
+
+/* We need to shift left by 1 - 31 bits */
+L_shift_1:
+	bsrl	%eax,%ecx	/* get the required shift in %ecx */
+	subl	$31,%ecx
+	negl	%ecx
+	shld	%cl,%ebx,%eax
+	shld	%cl,%edx,%ebx
+	shl	%cl,%edx
+	subl	%ecx,EXP(%edi)	/* Can get underflow here */
+
+L_round:
+	jmp	fpu_reg_round	/* Round the result */
+
+
+#ifdef PARANOID
+L_bugged_1:
+	pushl	EX_INTERNAL|0x206
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_error_exit
+
+L_bugged_2:
+	pushl	EX_INTERNAL|0x209
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_error_exit
+
+L_bugged_3:
+	pushl	EX_INTERNAL|0x210
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_error_exit
+
+L_bugged_4:
+	pushl	EX_INTERNAL|0x211
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_error_exit
+
+L_bugged:
+	pushl	EX_INTERNAL|0x212
+	call	EXCEPTION
+	pop	%ebx
+	jmp	L_error_exit
+#endif PARANOID
+
+
+L_error_exit:
+	movl	$1,%eax
+L_exit:
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	leave
+	ret
diff --git a/arch/i386/math-emu/round_Xsig.S b/arch/i386/math-emu/round_Xsig.S
new file mode 100644
index 000000000..163755878
--- /dev/null
+++ b/arch/i386/math-emu/round_Xsig.S
@@ -0,0 +1,148 @@
+/*---------------------------------------------------------------------------+
+ |  round_Xsig.S                                                             |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ | Normalize and round a 12 byte quantity.                                   |
+ | Call from C as:                                                           |
+ |   int round_Xsig(Xsig *n)                                                 |
+ |                                                                           |
+ | Normalize a 12 byte quantity.                                             |
+ | Call from C as:                                                           |
+ |   int norm_Xsig(Xsig *n)                                                  |
+ |                                                                           |
+ | Each function returns the size of the shift (nr of bits).                 |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+	.file	"round_Xsig.S"
+
+#include "fpu_asm.h"
+
+
+.text
+
+	.align 2,144
+.globl _round_Xsig
+
+_round_Xsig:
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	%ebx		/* Reserve some space */
+	pushl	%ebx
+	pushl	%esi
+
+	movl	PARAM1,%esi
+
+	movl	8(%esi),%edx
+	movl	4(%esi),%ebx
+	movl	(%esi),%eax
+
+	movl	$0,-4(%ebp)
+
+	orl	%edx,%edx	/* ms bits */
+	js	L_round		/* Already normalized */
+	jnz	L_shift_1	/* Shift left 1 - 31 bits */
+
+	movl	%ebx,%edx
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	movl	$-32,-4(%ebp)
+
+/* We need to shift left by 1 - 31 bits */
+L_shift_1:
+	bsrl	%edx,%ecx	/* get the required shift in %ecx */
+	subl	$31,%ecx
+	negl	%ecx
+	subl	%ecx,-4(%ebp)
+	shld	%cl,%ebx,%edx
+	shld	%cl,%eax,%ebx
+	shl	%cl,%eax
+
+L_round:
+	testl	$0x80000000,%eax
+	jz	L_exit
+
+	addl	$1,%ebx
+	adcl	$0,%edx
+	jnz	L_exit
+
+	movl	$0x80000000,%edx
+	incl	-4(%ebp)
+
+L_exit:
+	movl	%edx,8(%esi)
+	movl	%ebx,4(%esi)
+	movl	%eax,(%esi)
+
+	movl	-4(%ebp),%eax
+
+	popl	%esi
+	popl	%ebx
+	leave
+	ret
+
+
+
+
+	.align 2,144
+.globl _norm_Xsig
+
+_norm_Xsig:
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	%ebx		/* Reserve some space */
+	pushl	%ebx
+	pushl	%esi
+
+	movl	PARAM1,%esi
+
+	movl	8(%esi),%edx
+	movl	4(%esi),%ebx
+	movl	(%esi),%eax
+
+	movl	$0,-4(%ebp)
+
+	orl	%edx,%edx	/* ms bits */
+	js	L_n_exit		/* Already normalized */
+	jnz	L_n_shift_1	/* Shift left 1 - 31 bits */
+
+	movl	%ebx,%edx
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	movl	$-32,-4(%ebp)
+
+	orl	%edx,%edx	/* ms bits */
+	js	L_n_exit	/* Normalized now */
+	jnz	L_n_shift_1	/* Shift left 1 - 31 bits */
+
+	movl	%ebx,%edx
+	movl	%eax,%ebx
+	xorl	%eax,%eax
+	addl	$-32,-4(%ebp)
+	jmp	L_n_exit	/* Might not be normalized,
+	                           but shift no more. */
+
+/* We need to shift left by 1 - 31 bits */
+L_n_shift_1:
+	bsrl	%edx,%ecx	/* get the required shift in %ecx */
+	subl	$31,%ecx
+	negl	%ecx
+	subl	%ecx,-4(%ebp)
+	shld	%cl,%ebx,%edx
+	shld	%cl,%eax,%ebx
+	shl	%cl,%eax
+
+L_n_exit:
+	movl	%edx,8(%esi)
+	movl	%ebx,4(%esi)
+	movl	%eax,(%esi)
+
+	movl	-4(%ebp),%eax
+
+	popl	%esi
+	popl	%ebx
+	leave
+	ret
+
diff --git a/arch/i386/math-emu/shr_Xsig.S b/arch/i386/math-emu/shr_Xsig.S
new file mode 100644
index 000000000..d6724a204
--- /dev/null
+++ b/arch/i386/math-emu/shr_Xsig.S
@@ -0,0 +1,90 @@
+	.file	"shr_Xsig.S"
+/*---------------------------------------------------------------------------+
+ |  shr_Xsig.S                                                               |
+ |                                                                           |
+ | 12 byte right shift function                                              |
+ |                                                                           |
+ | Copyright (C) 1992,1994                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |   void shr_Xsig(Xsig *arg, unsigned nr)                                   |
+ |                                                                           |
+ |   Extended shift right function.                                          |
+ |   Fastest for small shifts.                                               |
+ |   Shifts the 12 byte quantity pointed to by the first arg (arg)           |
+ |   right by the number of bits specified by the second arg (nr).           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_asm.h"
+
+.text
+	.align 2,144
+
+	.globl	_shr_Xsig
+_shr_Xsig:
+	push	%ebp
+	movl	%esp,%ebp
+	pushl	%esi
+	movl	PARAM2,%ecx
+	movl	PARAM1,%esi
+	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
+	jnc	L_more_than_31
+
+/* less than 32 bits */
+	pushl	%ebx
+	movl	(%esi),%eax	/* lsl */
+	movl	4(%esi),%ebx	/* midl */
+	movl	8(%esi),%edx	/* msl */
+	shrd	%cl,%ebx,%eax
+	shrd	%cl,%edx,%ebx
+	shr	%cl,%edx
+	movl	%eax,(%esi)
+	movl	%ebx,4(%esi)
+	movl	%edx,8(%esi)
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
+
+L_more_than_31:
+	cmpl	$64,%ecx
+	jnc	L_more_than_63
+
+	subb	$32,%cl
+	movl	4(%esi),%eax	/* midl */
+	movl	8(%esi),%edx	/* msl */
+	shrd	%cl,%edx,%eax
+	shr	%cl,%edx
+	movl	%eax,(%esi)
+	movl	%edx,4(%esi)
+	movl	$0,8(%esi)
+	popl	%esi
+	leave
+	ret
+
+L_more_than_63:
+	cmpl	$96,%ecx
+	jnc	L_more_than_95
+
+	subb	$64,%cl
+	movl	8(%esi),%eax	/* msl */
+	shr	%cl,%eax
+	xorl	%edx,%edx
+	movl	%eax,(%esi)
+	movl	%edx,4(%esi)
+	movl	%edx,8(%esi)
+	popl	%esi
+	leave
+	ret
+
+L_more_than_95:
+	xorl	%eax,%eax
+	movl	%eax,(%esi)
+	movl	%eax,4(%esi)
+	movl	%eax,8(%esi)
+	popl	%esi
+	leave
+	ret
diff --git a/arch/i386/math-emu/status_w.h b/arch/i386/math-emu/status_w.h
new file mode 100644
index 000000000..96607d0e1
--- /dev/null
+++ b/arch/i386/math-emu/status_w.h
@@ -0,0 +1,65 @@
+/*---------------------------------------------------------------------------+
+ |  status_w.h                                                               |
+ |                                                                           |
+ | Copyright (C) 1992,1993                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#ifndef _STATUS_H_
+#define _STATUS_H_
+
+#include "fpu_emu.h"    /* for definition of PECULIAR_486 */
+
+#ifdef __ASSEMBLER__
+#define	Const__(x)	$##x
+#else
+#define	Const__(x)	x
+#endif
+
+#define SW_Backward    	Const__(0x8000)	/* backward compatibility */
+#define SW_C3		Const__(0x4000)	/* condition bit 3 */
+#define SW_Top		Const__(0x3800)	/* top of stack */
+#define SW_Top_Shift 	Const__(11)	/* shift for top of stack bits */
+#define SW_C2		Const__(0x0400)	/* condition bit 2 */
+#define SW_C1		Const__(0x0200)	/* condition bit 1 */
+#define SW_C0		Const__(0x0100)	/* condition bit 0 */
+#define SW_Summary     	Const__(0x0080)	/* exception summary */
+#define SW_Stack_Fault	Const__(0x0040)	/* stack fault */
+#define SW_Precision   	Const__(0x0020)	/* loss of precision */
+#define SW_Underflow   	Const__(0x0010)	/* underflow */
+#define SW_Overflow    	Const__(0x0008)	/* overflow */
+#define SW_Zero_Div    	Const__(0x0004)	/* divide by zero */
+#define SW_Denorm_Op   	Const__(0x0002)	/* denormalized operand */
+#define SW_Invalid     	Const__(0x0001)	/* invalid operation */
+
+#define SW_Exc_Mask     Const__(0x27f)  /* Status word exception bit mask */
+
+#ifndef __ASSEMBLER__
+
+#define COMP_A_gt_B	1
+#define COMP_A_eq_B	2
+#define COMP_A_lt_B	3
+#define COMP_No_Comp	4
+#define COMP_Denormal   0x20
+#define COMP_NaN	0x40
+#define COMP_SNaN	0x80
+
+#define status_word() \
+  ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top))
+#define setcc(cc) ({ \
+  partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); \
+  partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); })
+
+#ifdef PECULIAR_486
+   /* Default, this conveys no information, but an 80486 does it. */
+   /* Clear the SW_C1 bit, "other bits undefined". */
+#  define clear_C1()  { partial_status &= ~SW_C1; }
+# else
+#  define clear_C1()
+#endif PECULIAR_486
+
+#endif __ASSEMBLER__
+
+#endif _STATUS_H_
diff --git a/arch/i386/math-emu/version.h b/arch/i386/math-emu/version.h
new file mode 100644
index 000000000..4c75a4792
--- /dev/null
+++ b/arch/i386/math-emu/version.h
@@ -0,0 +1,12 @@
+/*---------------------------------------------------------------------------+
+ |  version.h                                                                |
+ |                                                                           |
+ |                                                                           |
+ | Copyright (C) 1992,1993,1994                                              |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#define FPU_VERSION "wm-FPU-emu version 1.20"
diff --git a/arch/i386/math-emu/wm_shrx.S b/arch/i386/math-emu/wm_shrx.S
new file mode 100644
index 000000000..bef0e1963
--- /dev/null
+++ b/arch/i386/math-emu/wm_shrx.S
@@ -0,0 +1,208 @@
+	.file	"wm_shrx.S"
+/*---------------------------------------------------------------------------+
+ |  wm_shrx.S                                                                |
+ |                                                                           |
+ | 64 bit right shift functions                                              |
+ |                                                                           |
+ | Copyright (C) 1992    W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |   unsigned shrx(void *arg1, unsigned arg2)                                |
+ | and                                                                       |
+ |   unsigned shrxs(void *arg1, unsigned arg2)                               |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "fpu_asm.h"
+
+.text
+	.align 2,144
+
+/*---------------------------------------------------------------------------+
+ |   unsigned shrx(void *arg1, unsigned arg2)                                |
+ |                                                                           |
+ |   Extended shift right function.                                          |
+ |   Fastest for small shifts.                                               |
+ |   Shifts the 64 bit quantity pointed to by the first arg (arg1)           |
+ |   right by the number of bits specified by the second arg (arg2).         |
+ |   Forms a 96 bit quantity from the 64 bit arg and eax:                    |
+ |                [  64 bit arg ][ eax ]                                     |
+ |            shift right  --------->                                        |
+ |   The eax register is initialized to 0 before the shifting.               |
+ |   Results returned in the 64 bit arg and eax.                             |
+ +---------------------------------------------------------------------------*/
+
+	.globl	_shrx
+
+_shrx:
+	push	%ebp
+	movl	%esp,%ebp
+	pushl	%esi
+	movl	PARAM2,%ecx
+	movl	PARAM1,%esi
+	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
+	jnc	L_more_than_31
+
+/* less than 32 bits */
+	pushl	%ebx
+	movl	(%esi),%ebx	/* lsl */
+	movl	4(%esi),%edx	/* msl */
+	xorl	%eax,%eax	/* extension */
+	shrd	%cl,%ebx,%eax
+	shrd	%cl,%edx,%ebx
+	shr	%cl,%edx
+	movl	%ebx,(%esi)
+	movl	%edx,4(%esi)
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
+
+L_more_than_31:
+	cmpl	$64,%ecx
+	jnc	L_more_than_63
+
+	subb	$32,%cl
+	movl	(%esi),%eax	/* lsl */
+	movl	4(%esi),%edx	/* msl */
+	shrd	%cl,%edx,%eax
+	shr	%cl,%edx
+	movl	%edx,(%esi)
+	movl	$0,4(%esi)
+	popl	%esi
+	leave
+	ret
+
+L_more_than_63:
+	cmpl	$96,%ecx
+	jnc	L_more_than_95
+
+	subb	$64,%cl
+	movl	4(%esi),%eax	/* msl */
+	shr	%cl,%eax
+	xorl	%edx,%edx
+	movl	%edx,(%esi)
+	movl	%edx,4(%esi)
+	popl	%esi
+	leave
+	ret
+
+L_more_than_95:
+	xorl	%eax,%eax
+	movl	%eax,(%esi)
+	movl	%eax,4(%esi)
+	popl	%esi
+	leave
+	ret
+
+
+/*---------------------------------------------------------------------------+
+ |   unsigned shrxs(void *arg1, unsigned arg2)                               |
+ |                                                                           |
+ |   Extended shift right function (optimized for small floating point       |
+ |   integers).                                                              |
+ |   Shifts the 64 bit quantity pointed to by the first arg (arg1)           |
+ |   right by the number of bits specified by the second arg (arg2).         |
+ |   Forms a 96 bit quantity from the 64 bit arg and eax:                    |
+ |                [  64 bit arg ][ eax ]                                     |
+ |            shift right  --------->                                        |
+ |   The eax register is initialized to 0 before the shifting.               |
+ |   The lower 8 bits of eax are lost and replaced by a flag which is        |
+ |   set (to 0x01) if any bit, apart from the first one, is set in the       |
+ |   part which has been shifted out of the arg.                             |
+ |   Results returned in the 64 bit arg and eax.                             |
+ +---------------------------------------------------------------------------*/
+	.globl	_shrxs
+_shrxs:
+	push	%ebp
+	movl	%esp,%ebp
+	pushl	%esi
+	pushl	%ebx
+	movl	PARAM2,%ecx
+	movl	PARAM1,%esi
+	cmpl	$64,%ecx	/* shrd only works for 0..31 bits */
+	jnc	Ls_more_than_63
+
+	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
+	jc	Ls_less_than_32
+
+/* We got here without jumps by assuming that the most common requirement
+   is for small integers */
+/* Shift by [32..63] bits */
+	subb	$32,%cl
+	movl	(%esi),%eax	/* lsl */
+	movl	4(%esi),%edx	/* msl */
+	xorl	%ebx,%ebx
+	shrd	%cl,%eax,%ebx
+	shrd	%cl,%edx,%eax
+	shr	%cl,%edx
+	orl	%ebx,%ebx		/* test these 32 bits */
+	setne	%bl
+	test	$0x7fffffff,%eax	/* and 31 bits here */
+	setne	%bh
+	orw	%bx,%bx			/* Any of the 63 bit set ? */
+	setne	%al
+	movl	%edx,(%esi)
+	movl	$0,4(%esi)
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
+
+/* Shift by [0..31] bits */
+Ls_less_than_32:
+	movl	(%esi),%ebx	/* lsl */
+	movl	4(%esi),%edx	/* msl */
+	xorl	%eax,%eax	/* extension */
+	shrd	%cl,%ebx,%eax
+	shrd	%cl,%edx,%ebx
+	shr	%cl,%edx
+	test	$0x7fffffff,%eax	/* only need to look at eax here */
+	setne	%al
+	movl	%ebx,(%esi)
+	movl	%edx,4(%esi)
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
+
+/* Shift by [64..95] bits */
+Ls_more_than_63:
+	cmpl	$96,%ecx
+	jnc	Ls_more_than_95
+
+	subb	$64,%cl
+	movl	(%esi),%ebx	/* lsl */
+	movl	4(%esi),%eax	/* msl */
+	xorl	%edx,%edx	/* extension */
+	shrd	%cl,%ebx,%edx
+	shrd	%cl,%eax,%ebx
+	shr	%cl,%eax
+	orl	%ebx,%edx
+	setne	%bl
+	test	$0x7fffffff,%eax	/* only need to look at eax here */
+	setne	%bh
+	orw	%bx,%bx
+	setne	%al
+	xorl	%edx,%edx
+	movl	%edx,(%esi)	/* set to zero */
+	movl	%edx,4(%esi)	/* set to zero */
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
+
+Ls_more_than_95:
+/* Shift by [96..inf) bits */
+	xorl	%eax,%eax
+	movl	(%esi),%ebx
+	orl	4(%esi),%ebx
+	setne	%al
+	xorl	%ebx,%ebx
+	movl	%ebx,(%esi)
+	movl	%ebx,4(%esi)
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
diff --git a/arch/i386/math-emu/wm_sqrt.S b/arch/i386/math-emu/wm_sqrt.S
new file mode 100644
index 000000000..4e028cb80
--- /dev/null
+++ b/arch/i386/math-emu/wm_sqrt.S
@@ -0,0 +1,474 @@
+	.file	"wm_sqrt.S"
+/*---------------------------------------------------------------------------+
+ |  wm_sqrt.S                                                                |
+ |                                                                           |
+ | Fixed point arithmetic square root evaluation.                            |
+ |                                                                           |
+ | Copyright (C) 1992,1993                                                   |
+ |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
+ |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
+ |                                                                           |
+ | Call from C as:                                                           |
+ |   void wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
+ |                                                                           |
+ +---------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------+
+ |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
+ |    returns the square root of n in n.                                     |
+ |                                                                           |
+ |  Use Newton's method to compute the square root of a number, which must   |
+ |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
+ |  Does not check the sign or tag of the argument.                          |
+ |  Sets the exponent, but not the sign or tag of the result.                |
+ |                                                                           |
+ |  The guess is kept in %esi:%edi                                           |
+ +---------------------------------------------------------------------------*/
+
+#include "exception.h"
+#include "fpu_asm.h"
+
+
+#ifndef NON_REENTRANT_FPU
+/*	Local storage on the stack: */
+#define FPU_accum_3	-4(%ebp)	/* ms word */
+#define FPU_accum_2	-8(%ebp)
+#define FPU_accum_1	-12(%ebp)
+#define FPU_accum_0	-16(%ebp)
+
+/*
+ * The de-normalised argument:
+ *                  sq_2                  sq_1              sq_0
+ *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
+ *           ^ binary point here
+ */
+#define FPU_fsqrt_arg_2	-20(%ebp)	/* ms word */
+#define FPU_fsqrt_arg_1	-24(%ebp)
+#define FPU_fsqrt_arg_0	-28(%ebp)	/* ls word, at most the ms bit is set */
+
+#else
+/*	Local storage in a static area: */
+.data
+	.align 4,0
+FPU_accum_3:
+	.long	0		/* ms word */
+FPU_accum_2:
+	.long	0
+FPU_accum_1:
+	.long	0
+FPU_accum_0:
+	.long	0
+
+/* The de-normalised argument:
+                    sq_2                  sq_1              sq_0
+          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
+             ^ binary point here
+ */
+FPU_fsqrt_arg_2:
+	.long	0		/* ms word */
+FPU_fsqrt_arg_1:
+	.long	0
+FPU_fsqrt_arg_0:
+	.long	0		/* ls word, at most the ms bit is set */
+#endif NON_REENTRANT_FPU
+
+
+.text
+	.align 2,144
+
+.globl _wm_sqrt
+_wm_sqrt:
+	pushl	%ebp
+	movl	%esp,%ebp
+#ifndef NON_REENTRANT_FPU
+	subl	$28,%esp
+#endif NON_REENTRANT_FPU
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	PARAM1,%esi
+
+	movl	SIGH(%esi),%eax
+	movl	SIGL(%esi),%ecx
+	xorl	%edx,%edx
+
+/* We use a rough linear estimate for the first guess.. */
+
+	cmpl	EXP_BIAS,EXP(%esi)
+	jnz	sqrt_arg_ge_2
+
+	shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */
+	rcrl	$1,%ecx
+	rcrl	$1,%edx
+
+sqrt_arg_ge_2:
+/* From here on, n is never accessed directly again until it is
+   replaced by the answer. */
+
+	movl	%eax,FPU_fsqrt_arg_2		/* ms word of n */
+	movl	%ecx,FPU_fsqrt_arg_1
+	movl	%edx,FPU_fsqrt_arg_0
+
+/* Make a linear first estimate */
+	shrl	$1,%eax
+	addl	$0x40000000,%eax
+	movl	$0xaaaaaaaa,%ecx
+	mull	%ecx
+	shll	%edx			/* max result was 7fff... */
+	testl	$0x80000000,%edx	/* but min was 3fff... */
+	jnz	sqrt_prelim_no_adjust
+
+	movl	$0x80000000,%edx	/* round up */
+
+sqrt_prelim_no_adjust:
+	movl	%edx,%esi	/* Our first guess */
+
+/* We have now computed (approx)   (2 + x) / 3, which forms the basis
+   for a few iterations of Newton's method */
+
+	movl	FPU_fsqrt_arg_2,%ecx	/* ms word */
+
+/*
+ * From our initial estimate, three iterations are enough to get us
+ * to 30 bits or so. This will then allow two iterations at better
+ * precision to complete the process.
+ */
+
+/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
+	shrl	%ecx		/* Doing this first will prevent a divide */
+				/* overflow later. */
+
+	movl	%ecx,%edx	/* msw of the arg / 2 */
+	divl	%esi		/* current estimate */
+	shrl	%esi		/* divide by 2 */
+	addl	%eax,%esi	/* the new estimate */
+
+	movl	%ecx,%edx
+	divl	%esi
+	shrl	%esi
+	addl	%eax,%esi
+
+	movl	%ecx,%edx
+	divl	%esi
+	shrl	%esi
+	addl	%eax,%esi
+
+/*
+ * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
+ * we improve it to 60 bits or so.
+ *
+ * The strategy from now on is to compute new estimates from
+ *      guess := guess + (n - guess^2) / (2 * guess)
+ */
+
+/* First, find the square of the guess */
+	movl	%esi,%eax
+	mull	%esi
+/* guess^2 now in %edx:%eax */
+
+	movl	FPU_fsqrt_arg_1,%ecx
+	subl	%ecx,%eax
+	movl	FPU_fsqrt_arg_2,%ecx	/* ms word of normalized n */
+	sbbl	%ecx,%edx
+	jnc	sqrt_stage_2_positive
+
+/* Subtraction gives a negative result,
+   negate the result before division. */
+	notl	%edx
+	notl	%eax
+	addl	$1,%eax
+	adcl	$0,%edx
+
+	divl	%esi
+	movl	%eax,%ecx
+
+	movl	%edx,%eax
+	divl	%esi
+	jmp	sqrt_stage_2_finish
+
+sqrt_stage_2_positive:
+	divl	%esi
+	movl	%eax,%ecx
+
+	movl	%edx,%eax
+	divl	%esi
+
+	notl	%ecx
+	notl	%eax
+	addl	$1,%eax
+	adcl	$0,%ecx
+
+sqrt_stage_2_finish:
+	sarl	$1,%ecx		/* divide by 2 */
+	rcrl	$1,%eax
+
+	/* Form the new estimate in %esi:%edi */
+	movl	%eax,%edi
+	addl	%ecx,%esi
+
+	jnz	sqrt_stage_2_done	/* result should be [1..2) */
+
+#ifdef PARANOID
+/* It should be possible to get here only if the arg is ffff....ffff */
+	cmp	$0xffffffff,FPU_fsqrt_arg_1
+	jnz	sqrt_stage_2_error
+#endif PARANOID
+
+/* The best rounded result. */
+	xorl	%eax,%eax
+	decl	%eax
+	movl	%eax,%edi
+	movl	%eax,%esi
+	movl	$0x7fffffff,%eax
+	jmp	sqrt_round_result
+
+#ifdef PARANOID
+sqrt_stage_2_error:
+	pushl	EX_INTERNAL|0x213
+	call	EXCEPTION
+#endif PARANOID
+
+sqrt_stage_2_done:
+
+/* Now the square root has been computed to better than 60 bits. */
+
+/* Find the square of the guess. */
+	movl	%edi,%eax		/* ls word of guess */
+	mull	%edi
+	movl	%edx,FPU_accum_1
+
+	movl	%esi,%eax
+	mull	%esi
+	movl	%edx,FPU_accum_3
+	movl	%eax,FPU_accum_2
+
+	movl	%edi,%eax
+	mull	%esi
+	addl	%eax,FPU_accum_1
+	adcl	%edx,FPU_accum_2
+	adcl	$0,FPU_accum_3
+
+/*	movl	%esi,%eax */
+/*	mull	%edi */
+	addl	%eax,FPU_accum_1
+	adcl	%edx,FPU_accum_2
+	adcl	$0,FPU_accum_3
+
+/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
+
+	movl	FPU_fsqrt_arg_0,%eax		/* get normalized n */
+	subl	%eax,FPU_accum_1
+	movl	FPU_fsqrt_arg_1,%eax
+	sbbl	%eax,FPU_accum_2
+	movl	FPU_fsqrt_arg_2,%eax		/* ms word of normalized n */
+	sbbl	%eax,FPU_accum_3
+	jnc	sqrt_stage_3_positive
+
+/* Subtraction gives a negative result,
+   negate the result before division */
+	notl	FPU_accum_1
+	notl	FPU_accum_2
+	notl	FPU_accum_3
+	addl	$1,FPU_accum_1
+	adcl	$0,FPU_accum_2
+
+#ifdef PARANOID
+	adcl	$0,FPU_accum_3	/* This must be zero */
+	jz	sqrt_stage_3_no_error
+
+sqrt_stage_3_error:
+	pushl	EX_INTERNAL|0x207
+	call	EXCEPTION
+
+sqrt_stage_3_no_error:
+#endif PARANOID
+
+	movl	FPU_accum_2,%edx
+	movl	FPU_accum_1,%eax
+	divl	%esi
+	movl	%eax,%ecx
+
+	movl	%edx,%eax
+	divl	%esi
+
+	sarl	$1,%ecx		/* divide by 2 */
+	rcrl	$1,%eax
+
+	/* prepare to round the result */
+
+	addl	%ecx,%edi
+	adcl	$0,%esi
+
+	jmp	sqrt_stage_3_finished
+
+sqrt_stage_3_positive:
+	movl	FPU_accum_2,%edx
+	movl	FPU_accum_1,%eax
+	divl	%esi
+	movl	%eax,%ecx
+
+	movl	%edx,%eax
+	divl	%esi
+
+	sarl	$1,%ecx		/* divide by 2 */
+	rcrl	$1,%eax
+
+	/* prepare to round the result */
+
+	notl	%eax		/* Negate the correction term */
+	notl	%ecx
+	addl	$1,%eax
+	adcl	$0,%ecx		/* carry here ==> correction == 0 */
+	adcl	$0xffffffff,%esi
+
+	addl	%ecx,%edi
+	adcl	$0,%esi
+
+sqrt_stage_3_finished:
+
+/*
+ * The result in %esi:%edi:%esi should be good to about 90 bits here,
+ * and the rounding information here does not have sufficient accuracy
+ * in a few rare cases.
+ */
+	cmpl	$0xffffffe0,%eax
+	ja	sqrt_near_exact_x
+
+	cmpl	$0x00000020,%eax
+	jb	sqrt_near_exact
+
+	cmpl	$0x7fffffe0,%eax
+	jb	sqrt_round_result
+
+	cmpl	$0x80000020,%eax
+	jb	sqrt_get_more_precision
+
+sqrt_round_result:
+/* Set up for rounding operations */
+	movl	%eax,%edx
+	movl	%esi,%eax
+	movl	%edi,%ebx
+	movl	PARAM1,%edi
+	movl	EXP_BIAS,EXP(%edi)	/* Result is in  [1.0 .. 2.0) */
+	movl	PARAM2,%ecx
+	jmp	fpu_reg_round_sqrt
+
+
+sqrt_near_exact_x:
+/* First, the estimate must be rounded up. */
+	addl	$1,%edi
+	adcl	$0,%esi
+
+sqrt_near_exact:
+/*
+ * This is an easy case because x^1/2 is monotonic.
+ * We need just find the square of our estimate, compare it
+ * with the argument, and deduce whether our estimate is
+ * above, below, or exact. We use the fact that the estimate
+ * is known to be accurate to about 90 bits.
+ */
+	movl	%edi,%eax		/* ls word of guess */
+	mull	%edi
+	movl	%edx,%ebx		/* 2nd ls word of square */
+	movl	%eax,%ecx		/* ls word of square */
+
+	movl	%edi,%eax
+	mull	%esi
+	addl	%eax,%ebx
+	addl	%eax,%ebx
+
+#ifdef PARANOID
+	cmp	$0xffffffb0,%ebx
+	jb	sqrt_near_exact_ok
+
+	cmp	$0x00000050,%ebx
+	ja	sqrt_near_exact_ok
+
+	pushl	EX_INTERNAL|0x214
+	call	EXCEPTION
+
+sqrt_near_exact_ok:
+#endif PARANOID
+
+	or	%ebx,%ebx
+	js	sqrt_near_exact_small
+
+	jnz	sqrt_near_exact_large
+
+	or	%ebx,%edx
+	jnz	sqrt_near_exact_large
+
+/* Our estimate is exactly the right answer */
+	xorl	%eax,%eax
+	jmp	sqrt_round_result
+
+sqrt_near_exact_small:
+/* Our estimate is too small */
+	movl	$0x000000ff,%eax
+	jmp	sqrt_round_result
+	
+sqrt_near_exact_large:
+/* Our estimate is too large, we need to decrement it */
+	subl	$1,%edi
+	sbbl	$0,%esi
+	movl	$0xffffff00,%eax
+	jmp	sqrt_round_result
+
+
+sqrt_get_more_precision:
+/* This case is almost the same as the above, except we start
+   with an extra bit of precision in the estimate. */
+	stc			/* The extra bit. */
+	rcll	$1,%edi		/* Shift the estimate left one bit */
+	rcll	$1,%esi
+
+	movl	%edi,%eax		/* ls word of guess */
+	mull	%edi
+	movl	%edx,%ebx		/* 2nd ls word of square */
+	movl	%eax,%ecx		/* ls word of square */
+
+	movl	%edi,%eax
+	mull	%esi
+	addl	%eax,%ebx
+	addl	%eax,%ebx
+
+/* Put our estimate back to its original value */
+	stc			/* The ms bit. */
+	rcrl	$1,%esi		/* Shift the estimate left one bit */
+	rcrl	$1,%edi
+
+#ifdef PARANOID
+	cmp	$0xffffff60,%ebx
+	jb	sqrt_more_prec_ok
+
+	cmp	$0x000000a0,%ebx
+	ja	sqrt_more_prec_ok
+
+	pushl	EX_INTERNAL|0x215
+	call	EXCEPTION
+
+sqrt_more_prec_ok:
+#endif PARANOID
+
+	or	%ebx,%ebx
+	js	sqrt_more_prec_small
+
+	jnz	sqrt_more_prec_large
+
+	or	%ebx,%ecx
+	jnz	sqrt_more_prec_large
+
+/* Our estimate is exactly the right answer */
+	movl	$0x80000000,%eax
+	jmp	sqrt_round_result
+
+sqrt_more_prec_small:
+/* Our estimate is too small */
+	movl	$0x800000ff,%eax
+	jmp	sqrt_round_result
+	
+sqrt_more_prec_large:
+/* Our estimate is too large */
+	movl	$0x7fffff00,%eax
+	jmp	sqrt_round_result
author	Ralf Baechle <ralf@linux-mips.org>	1995-11-14 08:00:00 +0000
committer	<ralf@linux-mips.org>	1995-11-14 08:00:00 +0000
commit	e7c2a72e2680827d6a733931273a93461c0d8d1b (patch)
tree	c9abeda78ef7504062bb2e816bcf3e3c9d680112 /arch/i386/math-emu
parent	ec6044459060a8c9ce7f64405c465d141898548c (diff)