diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1995-11-14 08:00:00 +0000 |
---|---|---|
committer | <ralf@linux-mips.org> | 1995-11-14 08:00:00 +0000 |
commit | e7c2a72e2680827d6a733931273a93461c0d8d1b (patch) | |
tree | c9abeda78ef7504062bb2e816bcf3e3c9d680112 /arch/i386/math-emu | |
parent | ec6044459060a8c9ce7f64405c465d141898548c (diff) |
Import of Linux/MIPS 1.3.0
Diffstat (limited to 'arch/i386/math-emu')
45 files changed, 12946 insertions, 0 deletions
diff --git a/arch/i386/math-emu/Makefile b/arch/i386/math-emu/Makefile new file mode 100644 index 000000000..2d391a9e6 --- /dev/null +++ b/arch/i386/math-emu/Makefile @@ -0,0 +1,52 @@ +# +# Makefile for wm-FPU-emu +# + +#DEBUG = -DDEBUGGING +DEBUG = +PARANOID = -DPARANOID +CFLAGS := $(CFLAGS) $(PARANOID) $(DEBUG) -fno-builtin + +.c.o: + $(CC) $(CFLAGS) $(MATH_EMULATION) -c $< + +.S.o: + $(CC) -D__ASSEMBLER__ $(PARANOID) -c $< + +.s.o: + $(CC) -c $< + +OBJS = fpu_entry.o div_small.o errors.o \ + fpu_arith.o fpu_aux.o fpu_etc.o fpu_trig.o \ + load_store.o get_address.o \ + poly_atan.o poly_l2.o poly_2xm1.o poly_sin.o poly_tan.o \ + reg_add_sub.o reg_compare.o reg_constant.o reg_ld_str.o \ + reg_div.o reg_mul.o reg_norm.o \ + reg_u_add.o reg_u_div.o reg_u_mul.o reg_u_sub.o \ + reg_round.o \ + wm_shrx.o wm_sqrt.o \ + div_Xsig.o polynom_Xsig.o round_Xsig.o \ + shr_Xsig.o mul_Xsig.o + +math.a: $(OBJS) + rm -f math.a + $(AR) rcs math.a $(OBJS) + sync + +dep: + $(CPP) -M *.c > .depend + $(CPP) -D__ASSEMBLER__ -M *.S >> .depend + +proto: + cproto -e -DMAKING_PROTO *.c >fpu_proto.h + +modules: + +dummy: + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif diff --git a/arch/i386/math-emu/README b/arch/i386/math-emu/README new file mode 100644 index 000000000..2c0acb423 --- /dev/null +++ b/arch/i386/math-emu/README @@ -0,0 +1,436 @@ + +---------------------------------------------------------------------------+ + | wm-FPU-emu an FPU emulator for 80386 and 80486SX microprocessors. | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | This program is free software; you can redistribute it and/or modify | + | it under the terms of the GNU General Public License version 2 as | + | published by the Free Software Foundation. | + | | + | This program is distributed in the hope that it will be useful, | + | but WITHOUT ANY WARRANTY; without even the implied warranty of | + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | + | GNU General Public License for more details. | + | | + | You should have received a copy of the GNU General Public License | + | along with this program; if not, write to the Free Software | + | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | + | | + +---------------------------------------------------------------------------+ + + + +wm-FPU-emu is an FPU emulator for Linux. It is derived from wm-emu387 +which was my 80387 emulator for early versions of djgpp (gcc under +msdos); wm-emu387 was in turn based upon emu387 which was written by +DJ Delorie for djgpp. The interface to the Linux kernel is based upon +the original Linux math emulator by Linus Torvalds. + +My target FPU for wm-FPU-emu is that described in the Intel486 +Programmer's Reference Manual (1992 edition). Unfortunately, numerous +facets of the functioning of the FPU are not well covered in the +Reference Manual. The information in the manual has been supplemented +with measurements on real 80486's. Unfortunately, it is simply not +possible to be sure that all of the peculiarities of the 80486 have +been discovered, so there is always likely to be obscure differences +in the detailed behaviour of the emulator and a real 80486. + +wm-FPU-emu does not implement all of the behaviour of the 80486 FPU, +but is very close. See "Limitations" later in this file for a list of +some differences. + +Please report bugs, etc to me at: + billm@vaxc.cc.monash.edu.au + or at: + billm@jacobi.maths.monash.edu.au + + +--Bill Metzenthen + August 1994 + + +----------------------- Internals of wm-FPU-emu ----------------------- + +Numeric algorithms: +(1) Add, subtract, and multiply. Nothing remarkable in these. +(2) Divide has been tuned to get reasonable performance. The algorithm + is not the obvious one which most people seem to use, but is designed + to take advantage of the characteristics of the 80386. I expect that + it has been invented many times before I discovered it, but I have not + seen it. It is based upon one of those ideas which one carries around + for years without ever bothering to check it out. +(3) The sqrt function has been tuned to get good performance. It is based + upon Newton's classic method. Performance was improved by capitalizing + upon the properties of Newton's method, and the code is once again + structured taking account of the 80386 characteristics. +(4) The trig, log, and exp functions are based in each case upon quasi- + "optimal" polynomial approximations. My definition of "optimal" was + based upon getting good accuracy with reasonable speed. +(5) The argument reducing code for the trig function effectively uses + a value of pi which is accurate to more than 128 bits. As a consequence, + the reduced argument is accurate to more than 64 bits for arguments up + to a few pi, and accurate to more than 64 bits for most arguments, + even for arguments approaching 2^63. This is far superior to an + 80486, which uses a value of pi which is accurate to 66 bits. + +The code of the emulator is complicated slightly by the need to +account for a limited form of re-entrancy. Normally, the emulator will +emulate each FPU instruction to completion without interruption. +However, it may happen that when the emulator is accessing the user +memory space, swapping may be needed. In this case the emulator may be +temporarily suspended while disk i/o takes place. During this time +another process may use the emulator, thereby perhaps changing static +variables. The code which accesses user memory is confined to five +files: + fpu_entry.c + reg_ld_str.c + load_store.c + get_address.c + errors.c +As from version 1.12 of the emulator, no static variables are used +(apart from those in the kernel's per-process tables). The emulator is +therefore now fully re-entrant, rather than having just the restricted +form of re-entrancy which is required by the Linux kernel. + +----------------------- Limitations of wm-FPU-emu ----------------------- + +There are a number of differences between the current wm-FPU-emu +(version 1.20) and the 80486 FPU (apart from bugs). Some of the more +important differences are listed below: + +The Roundup flag does not have much meaning for the transcendental +functions and its 80486 value with these functions is likely to differ +from its emulator value. + +In a few rare cases the Underflow flag obtained with the emulator will +be different from that obtained with an 80486. This occurs when the +following conditions apply simultaneously: +(a) the operands have a higher precision than the current setting of the + precision control (PC) flags. +(b) the underflow exception is masked. +(c) the magnitude of the exact result (before rounding) is less than 2^-16382. +(d) the magnitude of the final result (after rounding) is exactly 2^-16382. +(e) the magnitude of the exact result would be exactly 2^-16382 if the + operands were rounded to the current precision before the arithmetic + operation was performed. +If all of these apply, the emulator will set the Underflow flag but a real +80486 will not. + +NOTE: Certain formats of Extended Real are UNSUPPORTED. They are +unsupported by the 80486. They are the Pseudo-NaNs, Pseudoinfinities, +and Unnormals. None of these will be generated by an 80486 or by the +emulator. Do not use them. The emulator treats them differently in +detail from the way an 80486 does. + +The emulator treats PseudoDenormals differently from an 80486. These +numbers are in fact properly normalised numbers with the exponent +offset by 1, and the emulator treats them as such. Unlike the 80486, +the emulator does not generate a Denormal Operand exception for these +numbers. The arithmetical results produced when using such a number as +an operand are the same for the emulator and a real 80486 (apart from +any slight precision difference for the transcendental functions). +Neither the emulator nor an 80486 produces one of these numbers as the +result of any arithmetic operation. An 80486 can keep one of these +numbers in an FPU register with its identity as a PseudoDenormal, but +the emulator will not; they are always converted to a valid number. + +Self modifying code can cause the emulator to fail. An example of such +code is: + movl %esp,[%ebx] + fld1 +The FPU instruction may be (usually will be) loaded into the pre-fetch +queue of the cpu before the mov instruction is executed. If the +destination of the 'movl' overlaps the FPU instruction then the bytes +in the prefetch queue and memory will be inconsistent when the FPU +instruction is executed. The emulator will be invoked but will not be +able to find the instruction which caused the device-not-present +exception. For this case, the emulator cannot emulate the behaviour of +an 80486DX. + +Handling of the address size override prefix byte (0x67) has not been +extensively tested yet. A major problem exists because using it in +vm86 mode can cause a general protection fault. Address offsets +greater than 0xffff appear to be illegal in vm86 mode but are quite +acceptable (and work) in real mode. A small test program developed to +check the addressing, and which runs successfully in real mode, +crashes dosemu under Linux and also brings Windows down with a general +protection fault message when run under the MS-DOS prompt of Windows +3.1. (The program simply reads data from a valid address). + +The emulator supports 16-bit protected mode, with one difference from +an 80486DX. A 80486DX will allow some floating point instructions to +write a few bytes below the lowest address of the stack. The emulator +will not allow this in 16-bit protected mode: no instructions are +allowed to write outside the bounds set by the protection. + +----------------------- Performance of wm-FPU-emu ----------------------- + +Speed. +----- + +The speed of floating point computation with the emulator will depend +upon instruction mix. Relative performance is best for the instructions +which require most computation. The simple instructions are adversely +affected by the fpu instruction trap overhead. + + +Timing: Some simple timing tests have been made on the emulator functions. +The times include load/store instructions. All times are in microseconds +measured on a 33MHz 386 with 64k cache. The Turbo C tests were under +ms-dos, the next two columns are for emulators running with the djgpp +ms-dos extender. The final column is for wm-FPU-emu in Linux 0.97, +using libm4.0 (hard). + +function Turbo C djgpp 1.06 WM-emu387 wm-FPU-emu + + + 60.5 154.8 76.5 139.4 + - 61.1-65.5 157.3-160.8 76.2-79.5 142.9-144.7 + * 71.0 190.8 79.6 146.6 + / 61.2-75.0 261.4-266.9 75.3-91.6 142.2-158.1 + + sin() 310.8 4692.0 319.0 398.5 + cos() 284.4 4855.2 308.0 388.7 + tan() 495.0 8807.1 394.9 504.7 + atan() 328.9 4866.4 601.1 419.5-491.9 + + sqrt() 128.7 crashed 145.2 227.0 + log() 413.1-419.1 5103.4-5354.21 254.7-282.2 409.4-437.1 + exp() 479.1 6619.2 469.1 850.8 + + +The performance under Linux is improved by the use of look-ahead code. +The following results show the improvement which is obtained under +Linux due to the look-ahead code. Also given are the times for the +original Linux emulator with the 4.1 'soft' lib. + + [ Linus' note: I changed look-ahead to be the default under linux, as + there was no reason not to use it after I had edited it to be + disabled during tracing ] + + wm-FPU-emu w original w + look-ahead 'soft' lib + + 106.4 190.2 + - 108.6-111.6 192.4-216.2 + * 113.4 193.1 + / 108.8-124.4 700.1-706.2 + + sin() 390.5 2642.0 + cos() 381.5 2767.4 + tan() 496.5 3153.3 + atan() 367.2-435.5 2439.4-3396.8 + + sqrt() 195.1 4732.5 + log() 358.0-387.5 3359.2-3390.3 + exp() 619.3 4046.4 + + +These figures are now somewhat out-of-date. The emulator has become +progressively slower for most functions as more of the 80486 features +have been implemented. + + +----------------------- Accuracy of wm-FPU-emu ----------------------- + + +The accuracy of the emulator is in almost all cases equal to or better +than that of an Intel 80486 FPU. + +The results of the basic arithmetic functions (+,-,*,/), and fsqrt +match those of an 80486 FPU. They are the best possible; the error for +these never exceeds 1/2 an lsb. The fprem and fprem1 instructions +return exact results; they have no error. + + +The following table compares the emulator accuracy for the sqrt(), +trig and log functions against the Turbo C "emulator". For this table, +each function was tested at about 400 points. Ideal worst-case results +would be 64 bits. The reduced Turbo C accuracy of cos() and tan() for +arguments greater than pi/4 can be thought of as being related to the +precision of the argument x; e.g. an argument of pi/2-(1e-10) which is +accurate to 64 bits can result in a relative accuracy in cos() of +about 64 + log2(cos(x)) = 31 bits. + + +Function Tested x range Worst result Turbo C + (relative bits) + +sqrt(x) 1 .. 2 64.1 63.2 +atan(x) 1e-10 .. 200 64.2 62.8 +cos(x) 0 .. pi/2-(1e-10) 64.4 (x <= pi/4) 62.4 + 64.1 (x = pi/2-(1e-10)) 31.9 +sin(x) 1e-10 .. pi/2 64.0 62.8 +tan(x) 1e-10 .. pi/2-(1e-10) 64.0 (x <= pi/4) 62.1 + 64.1 (x = pi/2-(1e-10)) 31.9 +exp(x) 0 .. 1 63.1 ** 62.9 +log(x) 1+1e-6 .. 2 63.8 ** 62.1 + +** The accuracy for exp() and log() is low because the FPU (emulator) +does not compute them directly; two operations are required. + + +The emulator passes the "paranoia" tests (compiled with gcc 2.3.3 or +later) for 'float' variables (24 bit precision numbers) when precision +control is set to 24, 53 or 64 bits, and for 'double' variables (53 +bit precision numbers) when precision control is set to 53 bits (a +properly performing FPU cannot pass the 'paranoia' tests for 'double' +variables when precision control is set to 64 bits). + +The code for reducing the argument for the trig functions (fsin, fcos, +fptan and fsincos) has been improved and now effectively uses a value +for pi which is accurate to more than 128 bits precision. As a +consequence, the accuracy of these functions for large arguments has +been dramatically improved (and is now very much better than an 80486 +FPU). There is also now no degradation of accuracy for fcos and fptan +for operands close to pi/2. Measured results are (note that the +definition of accuracy has changed slightly from that used for the +above table): + +Function Tested x range Worst result + (absolute bits) + +cos(x) 0 .. 9.22e+18 62.0 +sin(x) 1e-16 .. 9.22e+18 62.1 +tan(x) 1e-16 .. 9.22e+18 61.8 + +It is possible with some effort to find very large arguments which +give much degraded precision. For example, the integer number + 8227740058411162616.0 +is within about 10e-7 of a multiple of pi. To find the tan (for +example) of this number to 64 bits precision it would be necessary to +have a value of pi which had about 150 bits precision. The FPU +emulator computes the result to about 42.6 bits precision (the correct +result is about -9.739715e-8). On the other hand, an 80486 FPU returns +0.01059, which in relative terms is hopelessly inaccurate. + +For arguments close to critical angles (which occur at multiples of +pi/2) the emulator is more accurate than an 80486 FPU. For very large +arguments, the emulator is far more accurate. + + +Prior to version 1.20 of the emulator, the accuracy of the results for +the transcendental functions (in their principal range) was not as +good as the results from an 80486 FPU. From version 1.20, the accuracy +has been considerably improved and these functions now give measured +worst-case results which are better than the worst-case results given +by an 80486 FPU. + +The following table gives the measured results for the emulator. The +number of randomly selected arguments in each case is about half a +million. The group of three columns gives the frequency of the given +accuracy in number of times per million, thus the second of these +columns shows that an accuracy of between 63.80 and 63.89 bits was +found at a rate of 133 times per one million measurements for fsin. +The results show that the fsin, fcos and fptan instructions return +results which are in error (i.e. less accurate than the best possible +result (which is 64 bits)) for about one per cent of all arguments +between -pi/2 and +pi/2. The other instructions have a lower +frequency of results which are in error. The last two columns give +the worst accuracy which was found (in bits) and the approximate value +of the argument which produced it. + + frequency (per M) + ------------------- --------------- +instr arg range # tests 63.7 63.8 63.9 worst at arg + bits bits bits bits +----- ------------ ------- ---- ---- ----- ----- -------- +fsin (0,pi/2) 547756 0 133 10673 63.89 0.451317 +fcos (0,pi/2) 547563 0 126 10532 63.85 0.700801 +fptan (0,pi/2) 536274 11 267 10059 63.74 0.784876 +fpatan 4 quadrants 517087 0 8 1855 63.88 0.435121 (4q) +fyl2x (0,20) 541861 0 0 1323 63.94 1.40923 (x) +fyl2xp1 (-.293,.414) 520256 0 0 5678 63.93 0.408542 (x) +f2xm1 (-1,1) 538847 4 481 6488 63.79 0.167709 + + +Tests performed on an 80486 FPU showed results of lower accuracy. The +following table gives the results which were obtained with an AMD +486DX2/66 (other tests indicate that an Intel 486DX produces +identical results). The tests were basically the same as those used +to measure the emulator (the values, being random, were in general not +the same). The total number of tests for each instruction are given +at the end of the table, in case each about 100k tests were performed. +Another line of figures at the end of the table shows that most of the +instructions return results which are in error for more than 10 +percent of the arguments tested. + +The numbers in the body of the table give the approx number of times a +result of the given accuracy in bits (given in the left-most column) +was obtained per one million arguments. For three of the instructions, +two columns of results are given: * The second column for f2xm1 gives +the number cases where the results of the first column were for a +positive argument, this shows that this instruction gives better +results for positive arguments than it does for negative. * In the +cases of fcos and fptan, the first column gives the results when all +cases where arguments greater than 1.5 were removed from the results +given in the second column. Unlike the emulator, an 80486 FPU returns +results of relatively poor accuracy for these instructions when the +argument approaches pi/2. The table does not show those cases when the +accuracy of the results were less than 62 bits, which occurs quite +often for fsin and fptan when the argument approaches pi/2. This poor +accuracy is discussed above in relation to the Turbo C "emulator", and +the accuracy of the value of pi. + + +bits f2xm1 f2xm1 fpatan fcos fcos fyl2x fyl2xp1 fsin fptan fptan +62.0 0 0 0 0 437 0 0 0 0 925 +62.1 0 0 10 0 894 0 0 0 0 1023 +62.2 14 0 0 0 1033 0 0 0 0 945 +62.3 57 0 0 0 1202 0 0 0 0 1023 +62.4 385 0 0 10 1292 0 23 0 0 1178 +62.5 1140 0 0 119 1649 0 39 0 0 1149 +62.6 2037 0 0 189 1620 0 16 0 0 1169 +62.7 5086 14 0 646 2315 10 101 35 39 1402 +62.8 8818 86 0 984 3050 59 287 131 224 2036 +62.9 11340 1355 0 2126 4153 79 605 357 321 1948 +63.0 15557 4750 0 3319 5376 246 1281 862 808 2688 +63.1 20016 8288 0 4620 6628 511 2569 1723 1510 3302 +63.2 24945 11127 10 6588 8098 1120 4470 2968 2990 4724 +63.3 25686 12382 69 8774 10682 1906 6775 4482 5474 7236 +63.4 29219 14722 79 11109 12311 3094 9414 7259 8912 10587 +63.5 30458 14936 393 13802 15014 5874 12666 9609 13762 15262 +63.6 32439 16448 1277 17945 19028 10226 15537 14657 19158 20346 +63.7 35031 16805 4067 23003 23947 18910 20116 21333 25001 26209 +63.8 33251 15820 7673 24781 25675 24617 25354 24440 29433 30329 +63.9 33293 16833 18529 28318 29233 31267 31470 27748 29676 30601 + +Per cent with error: + 30.9 3.2 18.5 9.8 13.1 11.6 17.4 +Total arguments tested: + 70194 70099 101784 100641 100641 101799 128853 114893 102675 102675 + + +------------------------- Contributors ------------------------------- + +A number of people have contributed to the development of the +emulator, often by just reporting bugs, sometimes with suggested +fixes, and a few kind people have provided me with access in one way +or another to an 80486 machine. Contributors include (to those people +who I may have forgotten, please forgive me): + +Linus Torvalds +Tommy.Thorn@daimi.aau.dk +Andrew.Tridgell@anu.edu.au +Nick Holloway, alfie@dcs.warwick.ac.uk +Hermano Moura, moura@dcs.gla.ac.uk +Jon Jagger, J.Jagger@scp.ac.uk +Lennart Benschop +Brian Gallew, geek+@CMU.EDU +Thomas Staniszewski, ts3v+@andrew.cmu.edu +Martin Howell, mph@plasma.apana.org.au +M Saggaf, alsaggaf@athena.mit.edu +Peter Barker, PETER@socpsy.sci.fau.edu +tom@vlsivie.tuwien.ac.at +Dan Russel, russed@rpi.edu +Daniel Carosone, danielce@ee.mu.oz.au +cae@jpmorgan.com +Hamish Coleman, t933093@minyos.xx.rmit.oz.au +Bruce Evans, bde@kralizec.zeta.org.au +Timo Korvola, Timo.Korvola@hut.fi +Rick Lyons, rick@razorback.brisnet.org.au +Rick, jrs@world.std.com + +...and numerous others who responded to my request for help with +a real 80486. + diff --git a/arch/i386/math-emu/control_w.h b/arch/i386/math-emu/control_w.h new file mode 100644 index 000000000..ef5fced39 --- /dev/null +++ b/arch/i386/math-emu/control_w.h @@ -0,0 +1,45 @@ +/*---------------------------------------------------------------------------+ + | control_w.h | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _CONTROLW_H_ +#define _CONTROLW_H_ + +#ifdef __ASSEMBLER__ +#define _Const_(x) $##x +#else +#define _Const_(x) x +#endif + +#define CW_RC _Const_(0x0C00) /* rounding control */ +#define CW_PC _Const_(0x0300) /* precision control */ + +#define CW_Precision Const_(0x0020) /* loss of precision mask */ +#define CW_Underflow Const_(0x0010) /* underflow mask */ +#define CW_Overflow Const_(0x0008) /* overflow mask */ +#define CW_ZeroDiv Const_(0x0004) /* divide by zero mask */ +#define CW_Denormal Const_(0x0002) /* denormalized operand mask */ +#define CW_Invalid Const_(0x0001) /* invalid operation mask */ + +#define CW_Exceptions _Const_(0x003f) /* all masks */ + +#define RC_RND _Const_(0x0000) +#define RC_DOWN _Const_(0x0400) +#define RC_UP _Const_(0x0800) +#define RC_CHOP _Const_(0x0C00) + +/* p 15-5: Precision control bits affect only the following: + ADD, SUB(R), MUL, DIV(R), and SQRT */ +#define PR_24_BITS _Const_(0x000) +#define PR_53_BITS _Const_(0x200) +#define PR_64_BITS _Const_(0x300) +#define PR_RESERVED_BITS _Const_(0x100) +/* FULL_PRECISION simulates all exceptions masked */ +#define FULL_PRECISION (PR_64_BITS | RC_RND | 0x3f) + +#endif _CONTROLW_H_ diff --git a/arch/i386/math-emu/div_Xsig.S b/arch/i386/math-emu/div_Xsig.S new file mode 100644 index 000000000..67d8be964 --- /dev/null +++ b/arch/i386/math-emu/div_Xsig.S @@ -0,0 +1,369 @@ + .file "div_Xsig.S" +/*---------------------------------------------------------------------------+ + | div_Xsig.S | + | | + | Division subroutine for 96 bit quantities | + | | + | Copyright (C) 1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and | + | put the 96 bit result at the location d. | + | | + | The result may not be accurate to 96 bits. It is intended for use where | + | a result better than 64 bits is required. The result should usually be | + | good to at least 94 bits. | + | The returned result is actually divided by one half. This is done to | + | prevent overflow. | + | | + | .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd | + | | + | void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_asm.h" + + +#define XsigLL(x) (x) +#define XsigL(x) 4(x) +#define XsigH(x) 8(x) + + +#ifndef NON_REENTRANT_FPU +/* + Local storage on the stack: + Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + */ +#define FPU_accum_3 -4(%ebp) +#define FPU_accum_2 -8(%ebp) +#define FPU_accum_1 -12(%ebp) +#define FPU_accum_0 -16(%ebp) +#define FPU_result_3 -20(%ebp) +#define FPU_result_2 -24(%ebp) +#define FPU_result_1 -28(%ebp) + +#else +.data +/* + Local storage in a static area: + Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + */ + .align 2,0 +FPU_accum_3: + .long 0 +FPU_accum_2: + .long 0 +FPU_accum_1: + .long 0 +FPU_accum_0: + .long 0 +FPU_result_3: + .long 0 +FPU_result_2: + .long 0 +FPU_result_1: + .long 0 +#endif NON_REENTRANT_FPU + + +.text + .align 2,144 + +.globl _div_Xsig + +_div_Xsig: + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $28,%esp +#endif NON_REENTRANT_FPU + + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi /* pointer to num */ + movl PARAM2,%ebx /* pointer to denom */ + +#ifdef PARANOID + testl $0x80000000, XsigH(%ebx) /* Divisor */ + je L_bugged +#endif PARANOID + + +/*---------------------------------------------------------------------------+ + | Divide: Return arg1/arg2 to arg3. | + | | + | The maximum returned value is (ignoring exponents) | + | .ffffffff ffffffff | + | ------------------ = 1.ffffffff fffffffe | + | .80000000 00000000 | + | and the minimum is | + | .80000000 00000000 | + | ------------------ = .80000000 00000001 (rounded) | + | .ffffffff ffffffff | + | | + +---------------------------------------------------------------------------*/ + + /* Save extended dividend in local register */ + + /* Divide by 2 to prevent overflow */ + clc + movl XsigH(%esi),%eax + rcrl %eax + movl %eax,FPU_accum_3 + movl XsigL(%esi),%eax + rcrl %eax + movl %eax,FPU_accum_2 + movl XsigLL(%esi),%eax + rcrl %eax + movl %eax,FPU_accum_1 + movl $0,%eax + rcrl %eax + movl %eax,FPU_accum_0 + + movl FPU_accum_2,%eax /* Get the current num */ + movl FPU_accum_3,%edx + +/*----------------------------------------------------------------------*/ +/* Initialization done. + Do the first 32 bits. */ + + /* We will divide by a number which is too large */ + movl XsigH(%ebx),%ecx + addl $1,%ecx + jnc LFirst_div_not_1 + + /* here we need to divide by 100000000h, + i.e., no division at all.. */ + mov %edx,%eax + jmp LFirst_div_done + +LFirst_div_not_1: + divl %ecx /* Divide the numerator by the augmented + denom ms dw */ + +LFirst_div_done: + movl %eax,FPU_result_3 /* Put the result in the answer */ + + mull XsigH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_2 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_3 + + movl FPU_result_3,%eax /* Get the result back */ + mull XsigL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + sbbl $0,FPU_accum_3 + je LDo_2nd_32_bits /* Must check for non-zero result here */ + +#ifdef PARANOID + jb L_bugged_1 +#endif PARANOID + + /* need to subtract another once of the denom */ + incl FPU_result_3 /* Correct the answer */ + + movl XsigL(%ebx),%eax + movl XsigH(%ebx),%edx + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + sbbl $0,FPU_accum_3 + jne L_bugged_1 /* Must check for non-zero result here */ +#endif PARANOID + +/*----------------------------------------------------------------------*/ +/* Half of the main problem is done, there is just a reduced numerator + to handle now. + Work with the second 32 bits, FPU_accum_0 not used from now on */ +LDo_2nd_32_bits: + movl FPU_accum_2,%edx /* get the reduced num */ + movl FPU_accum_1,%eax + + /* need to check for possible subsequent overflow */ + cmpl XsigH(%ebx),%edx + jb LDo_2nd_div + ja LPrevent_2nd_overflow + + cmpl XsigL(%ebx),%eax + jb LDo_2nd_div + +LPrevent_2nd_overflow: +/* The numerator is greater or equal, would cause overflow */ + /* prevent overflow */ + subl XsigL(%ebx),%eax + sbbl XsigH(%ebx),%edx + movl %edx,FPU_accum_2 + movl %eax,FPU_accum_1 + + incl FPU_result_3 /* Reflect the subtraction in the answer */ + +#ifdef PARANOID + je L_bugged_2 /* Can't bump the result to 1.0 */ +#endif PARANOID + +LDo_2nd_div: + cmpl $0,%ecx /* augmented denom msw */ + jnz LSecond_div_not_1 + + /* %ecx == 0, we are dividing by 1.0 */ + mov %edx,%eax + jmp LSecond_div_done + +LSecond_div_not_1: + divl %ecx /* Divide the numerator by the denom ms dw */ + +LSecond_div_done: + movl %eax,FPU_result_2 /* Put the result in the answer */ + + mull XsigH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif PARANOID + + movl FPU_result_2,%eax /* Get the result back */ + mull XsigL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif PARANOID + + jz LDo_3rd_32_bits + +#ifdef PARANOID + cmpl $1,FPU_accum_2 + jne L_bugged_2 +#endif PARANOID + + /* need to subtract another once of the denom */ + movl XsigL(%ebx),%eax + movl XsigH(%ebx),%edx + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 + jne L_bugged_2 +#endif PARANOID + + addl $1,FPU_result_2 /* Correct the answer */ + adcl $0,FPU_result_3 + +#ifdef PARANOID + jc L_bugged_2 /* Must check for non-zero result here */ +#endif PARANOID + +/*----------------------------------------------------------------------*/ +/* The division is essentially finished here, we just need to perform + tidying operations. + Deal with the 3rd 32 bits */ +LDo_3rd_32_bits: + /* We use an approximation for the third 32 bits. + To take account of the 3rd 32 bits of the divisor + (call them del), we subtract del * (a/b) */ + + movl FPU_result_3,%eax /* a/b */ + mull XsigLL(%ebx) /* del */ + + subl %edx,FPU_accum_1 + + /* A borrow indicates that the result is negative */ + jnb LTest_over + + movl XsigH(%ebx),%edx + addl %edx,FPU_accum_1 + + subl $1,FPU_result_2 /* Adjust the answer */ + sbbl $0,FPU_result_3 + + /* The above addition might not have been enough, check again. */ + movl FPU_accum_1,%edx /* get the reduced num */ + cmpl XsigH(%ebx),%edx /* denom */ + jb LDo_3rd_div + + movl XsigH(%ebx),%edx + addl %edx,FPU_accum_1 + + subl $1,FPU_result_2 /* Adjust the answer */ + sbbl $0,FPU_result_3 + jmp LDo_3rd_div + +LTest_over: + movl FPU_accum_1,%edx /* get the reduced num */ + + /* need to check for possible subsequent overflow */ + cmpl XsigH(%ebx),%edx /* denom */ + jb LDo_3rd_div + + /* prevent overflow */ + subl XsigH(%ebx),%edx + movl %edx,FPU_accum_1 + + addl $1,FPU_result_2 /* Reflect the subtraction in the answer */ + adcl $0,FPU_result_3 + +LDo_3rd_div: + movl FPU_accum_0,%eax + movl FPU_accum_1,%edx + divl XsigH(%ebx) + + movl %eax,FPU_result_1 /* Rough estimate of third word */ + + movl PARAM3,%esi /* pointer to answer */ + + movl FPU_result_1,%eax + movl %eax,XsigLL(%esi) + movl FPU_result_2,%eax + movl %eax,XsigL(%esi) + movl FPU_result_3,%eax + movl %eax,XsigH(%esi) + +L_exit: + popl %ebx + popl %edi + popl %esi + + leave + ret + + +#ifdef PARANOID +/* The logic is wrong if we got here */ +L_bugged: + pushl EX_INTERNAL|0x240 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_1: + pushl EX_INTERNAL|0x241 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_2: + pushl EX_INTERNAL|0x242 + call EXCEPTION + pop %ebx + jmp L_exit +#endif PARANOID diff --git a/arch/i386/math-emu/div_small.S b/arch/i386/math-emu/div_small.S new file mode 100644 index 000000000..0225a96d4 --- /dev/null +++ b/arch/i386/math-emu/div_small.S @@ -0,0 +1,50 @@ + .file "div_small.S" +/*---------------------------------------------------------------------------+ + | div_small.S | + | | + | Divide a 64 bit integer by a 32 bit integer & return remainder. | + | | + | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | unsigned long div_small(unsigned long long *x, unsigned long y) | + +---------------------------------------------------------------------------*/ + +#include "fpu_asm.h" + +.text + .align 2,144 + +.globl _div_small + +_div_small: + pushl %ebp + movl %esp,%ebp + + pushl %esi + + movl PARAM1,%esi /* pointer to num */ + movl PARAM2,%ecx /* The denominator */ + + movl 4(%esi),%eax /* Get the current num msw */ + xorl %edx,%edx + divl %ecx + + movl %eax,4(%esi) + + movl (%esi),%eax /* Get the num lsw */ + divl %ecx + + movl %eax,(%esi) + + movl %edx,%eax /* Return the remainder in eax */ + + popl %esi + + leave + ret + diff --git a/arch/i386/math-emu/errors.c b/arch/i386/math-emu/errors.c new file mode 100644 index 000000000..e34eec942 --- /dev/null +++ b/arch/i386/math-emu/errors.c @@ -0,0 +1,671 @@ +/*---------------------------------------------------------------------------+ + | errors.c | + | | + | The error handling functions for wm-FPU-emu | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +#include <linux/signal.h> + +#include <asm/segment.h> + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "control_w.h" +#include "reg_constant.h" +#include "version.h" + +/* */ +#undef PRINT_MESSAGES +/* */ + + +void Un_impl(void) +{ + unsigned char byte1, FPU_modrm; + unsigned long address = FPU_ORIG_EIP; + + RE_ENTRANT_CHECK_OFF; + /* No need to verify_area(), we have previously fetched these bytes. */ + printk("Unimplemented FPU Opcode at eip=%p : ", (void *) address); + if ( FPU_CS == USER_CS ) + { + while ( 1 ) + { + byte1 = get_fs_byte((unsigned char *) address); + if ( (byte1 & 0xf8) == 0xd8 ) break; + printk("[%02x]", byte1); + address++; + } + printk("%02x ", byte1); + FPU_modrm = get_fs_byte(1 + (unsigned char *) address); + + if (FPU_modrm >= 0300) + printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7); + else + printk("/%d\n", (FPU_modrm >> 3) & 7); + } + else + { + printk("cs selector = %04x\n", FPU_CS); + } + + RE_ENTRANT_CHECK_ON; + + EXCEPTION(EX_Invalid); + +} + + +/* + Called for opcodes which are illegal and which are known to result in a + SIGILL with a real 80486. + */ +void FPU_illegal(void) +{ + math_abort(FPU_info,SIGILL); +} + + + +void emu_printall() +{ + int i; + static char *tag_desc[] = { "Valid", "Zero", "ERROR", "ERROR", + "DeNorm", "Inf", "NaN", "Empty" }; + unsigned char byte1, FPU_modrm; + unsigned long address = FPU_ORIG_EIP; + + RE_ENTRANT_CHECK_OFF; + /* No need to verify_area(), we have previously fetched these bytes. */ + printk("At %p:", (void *) address); + if ( FPU_CS == USER_CS ) + { +#define MAX_PRINTED_BYTES 20 + for ( i = 0; i < MAX_PRINTED_BYTES; i++ ) + { + byte1 = get_fs_byte((unsigned char *) address); + if ( (byte1 & 0xf8) == 0xd8 ) + { + printk(" %02x", byte1); + break; + } + printk(" [%02x]", byte1); + address++; + } + if ( i == MAX_PRINTED_BYTES ) + printk(" [more..]\n"); + else + { + FPU_modrm = get_fs_byte(1 + (unsigned char *) address); + + if (FPU_modrm >= 0300) + printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7); + else + printk(" /%d, mod=%d rm=%d\n", + (FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7); + } + } + else + { + printk("%04x\n", FPU_CS); + } + + partial_status = status_word(); + +#ifdef DEBUGGING +if ( partial_status & SW_Backward ) printk("SW: backward compatibility\n"); +if ( partial_status & SW_C3 ) printk("SW: condition bit 3\n"); +if ( partial_status & SW_C2 ) printk("SW: condition bit 2\n"); +if ( partial_status & SW_C1 ) printk("SW: condition bit 1\n"); +if ( partial_status & SW_C0 ) printk("SW: condition bit 0\n"); +if ( partial_status & SW_Summary ) printk("SW: exception summary\n"); +if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n"); +if ( partial_status & SW_Precision ) printk("SW: loss of precision\n"); +if ( partial_status & SW_Underflow ) printk("SW: underflow\n"); +if ( partial_status & SW_Overflow ) printk("SW: overflow\n"); +if ( partial_status & SW_Zero_Div ) printk("SW: divide by zero\n"); +if ( partial_status & SW_Denorm_Op ) printk("SW: denormalized operand\n"); +if ( partial_status & SW_Invalid ) printk("SW: invalid operation\n"); +#endif DEBUGGING + + printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n", + partial_status & 0x8000 ? 1 : 0, /* busy */ + (partial_status & 0x3800) >> 11, /* stack top pointer */ + partial_status & 0x80 ? 1 : 0, /* Error summary status */ + partial_status & 0x40 ? 1 : 0, /* Stack flag */ + partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */ + partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */ + partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0, + partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0, + partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0); + +printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d ef=%d%d%d%d%d%d\n", + control_word & 0x1000 ? 1 : 0, + (control_word & 0x800) >> 11, (control_word & 0x400) >> 10, + (control_word & 0x200) >> 9, (control_word & 0x100) >> 8, + control_word & 0x80 ? 1 : 0, + control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0, + control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0, + control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0); + + for ( i = 0; i < 8; i++ ) + { + FPU_REG *r = &st(i); + switch (r->tag) + { + case TW_Empty: + continue; + break; + case TW_Zero: +#if 0 + printk("st(%d) %c .0000 0000 0000 0000 ", + i, r->sign ? '-' : '+'); + break; +#endif + case TW_Valid: + case TW_NaN: +/* case TW_Denormal: */ + case TW_Infinity: + printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6ld ", i, + r->sign ? '-' : '+', + (long)(r->sigh >> 16), + (long)(r->sigh & 0xFFFF), + (long)(r->sigl >> 16), + (long)(r->sigl & 0xFFFF), + r->exp - EXP_BIAS + 1); + break; + default: + printk("Whoops! Error in errors.c "); + break; + } + printk("%s\n", tag_desc[(int) (unsigned) r->tag]); + } + +#ifdef OBSOLETE + printk("[data] %c .%04lx %04lx %04lx %04lx e%+-6ld ", + FPU_loaded_data.sign ? '-' : '+', + (long)(FPU_loaded_data.sigh >> 16), + (long)(FPU_loaded_data.sigh & 0xFFFF), + (long)(FPU_loaded_data.sigl >> 16), + (long)(FPU_loaded_data.sigl & 0xFFFF), + FPU_loaded_data.exp - EXP_BIAS + 1); + printk("%s\n", tag_desc[(int) (unsigned) FPU_loaded_data.tag]); +#endif OBSOLETE + RE_ENTRANT_CHECK_ON; + +} + +static struct { + int type; + char *name; +} exception_names[] = { + { EX_StackOver, "stack overflow" }, + { EX_StackUnder, "stack underflow" }, + { EX_Precision, "loss of precision" }, + { EX_Underflow, "underflow" }, + { EX_Overflow, "overflow" }, + { EX_ZeroDiv, "divide by zero" }, + { EX_Denormal, "denormalized operand" }, + { EX_Invalid, "invalid operation" }, + { EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION }, + { 0, NULL } +}; + +/* + EX_INTERNAL is always given with a code which indicates where the + error was detected. + + Internal error types: + 0x14 in fpu_etc.c + 0x1nn in a *.c file: + 0x101 in reg_add_sub.c + 0x102 in reg_mul.c + 0x104 in poly_atan.c + 0x105 in reg_mul.c + 0x107 in fpu_trig.c + 0x108 in reg_compare.c + 0x109 in reg_compare.c + 0x110 in reg_add_sub.c + 0x111 in fpe_entry.c + 0x112 in fpu_trig.c + 0x113 in errors.c + 0x115 in fpu_trig.c + 0x116 in fpu_trig.c + 0x117 in fpu_trig.c + 0x118 in fpu_trig.c + 0x119 in fpu_trig.c + 0x120 in poly_atan.c + 0x121 in reg_compare.c + 0x122 in reg_compare.c + 0x123 in reg_compare.c + 0x125 in fpu_trig.c + 0x126 in fpu_entry.c + 0x127 in poly_2xm1.c + 0x128 in fpu_entry.c + 0x129 in fpu_entry.c + 0x130 in get_address.c + 0x131 in get_address.c + 0x132 in get_address.c + 0x133 in get_address.c + 0x140 in load_store.c + 0x141 in load_store.c + 0x150 in poly_sin.c + 0x151 in poly_sin.c + 0x160 in reg_ld_str.c + 0x161 in reg_ld_str.c + 0x162 in reg_ld_str.c + 0x163 in reg_ld_str.c + 0x2nn in an *.S file: + 0x201 in reg_u_add.S + 0x202 in reg_u_div.S + 0x203 in reg_u_div.S + 0x204 in reg_u_div.S + 0x205 in reg_u_mul.S + 0x206 in reg_u_sub.S + 0x207 in wm_sqrt.S + 0x208 in reg_div.S + 0x209 in reg_u_sub.S + 0x210 in reg_u_sub.S + 0x211 in reg_u_sub.S + 0x212 in reg_u_sub.S + 0x213 in wm_sqrt.S + 0x214 in wm_sqrt.S + 0x215 in wm_sqrt.S + 0x220 in reg_norm.S + 0x221 in reg_norm.S + 0x230 in reg_round.S + 0x231 in reg_round.S + 0x232 in reg_round.S + 0x233 in reg_round.S + 0x234 in reg_round.S + 0x235 in reg_round.S + 0x236 in reg_round.S + 0x240 in div_Xsig.S + 0x241 in div_Xsig.S + 0x242 in div_Xsig.S + */ + +void exception(int n) +{ + int i, int_type; + + int_type = 0; /* Needed only to stop compiler warnings */ + if ( n & EX_INTERNAL ) + { + int_type = n - EX_INTERNAL; + n = EX_INTERNAL; + /* Set lots of exception bits! */ + partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward); + } + else + { + /* Extract only the bits which we use to set the status word */ + n &= (SW_Exc_Mask); + /* Set the corresponding exception bit */ + partial_status |= n; + /* Set summary bits iff exception isn't masked */ + if ( partial_status & ~control_word & CW_Exceptions ) + partial_status |= (SW_Summary | SW_Backward); + if ( n & (SW_Stack_Fault | EX_Precision) ) + { + if ( !(n & SW_C1) ) + /* This bit distinguishes over- from underflow for a stack fault, + and roundup from round-down for precision loss. */ + partial_status &= ~SW_C1; + } + } + + RE_ENTRANT_CHECK_OFF; + if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) ) + { +#ifdef PRINT_MESSAGES + /* My message from the sponsor */ + printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n"); +#endif PRINT_MESSAGES + + /* Get a name string for error reporting */ + for (i=0; exception_names[i].type; i++) + if ( (exception_names[i].type & n) == exception_names[i].type ) + break; + + if (exception_names[i].type) + { +#ifdef PRINT_MESSAGES + printk("FP Exception: %s!\n", exception_names[i].name); +#endif PRINT_MESSAGES + } + else + printk("FPU emulator: Unknown Exception: 0x%04x!\n", n); + + if ( n == EX_INTERNAL ) + { + printk("FPU emulator: Internal error type 0x%04x\n", int_type); + emu_printall(); + } +#ifdef PRINT_MESSAGES + else + emu_printall(); +#endif PRINT_MESSAGES + + /* + * The 80486 generates an interrupt on the next non-control FPU + * instruction. So we need some means of flagging it. + * We use the ES (Error Summary) bit for this, assuming that + * this is the way a real FPU does it (until I can check it out), + * if not, then some method such as the following kludge might + * be needed. + */ +/* regs[0].tag |= TW_FPU_Interrupt; */ + } + RE_ENTRANT_CHECK_ON; + +#ifdef __DEBUG__ + math_abort(FPU_info,SIGFPE); +#endif __DEBUG__ + +} + + +/* Real operation attempted on two operands, one a NaN. */ +/* Returns nz if the exception is unmasked */ +asmlinkage int real_2op_NaN(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest) +{ + FPU_REG const *x; + int signalling; + + /* The default result for the case of two "equal" NaNs (signs may + differ) is chosen to reproduce 80486 behaviour */ + x = a; + if (a->tag == TW_NaN) + { + if (b->tag == TW_NaN) + { + signalling = !(a->sigh & b->sigh & 0x40000000); + /* find the "larger" */ + if ( significand(a) < significand(b) ) + x = b; + } + else + { + /* return the quiet version of the NaN in a */ + signalling = !(a->sigh & 0x40000000); + } + } + else +#ifdef PARANOID + if (b->tag == TW_NaN) +#endif PARANOID + { + signalling = !(b->sigh & 0x40000000); + x = b; + } +#ifdef PARANOID + else + { + signalling = 0; + EXCEPTION(EX_INTERNAL|0x113); + x = &CONST_QNaN; + } +#endif PARANOID + + if ( !signalling ) + { + if ( !(x->sigh & 0x80000000) ) /* pseudo-NaN ? */ + x = &CONST_QNaN; + reg_move(x, dest); + return 0; + } + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + if ( !(x->sigh & 0x80000000) ) /* pseudo-NaN ? */ + x = &CONST_QNaN; + reg_move(x, dest); + /* ensure a Quiet NaN */ + dest->sigh |= 0x40000000; + } + + EXCEPTION(EX_Invalid); + + return !(control_word & CW_Invalid); +} + + +/* Invalid arith operation on Valid registers */ +/* Returns nz if the exception is unmasked */ +asmlinkage int arith_invalid(FPU_REG *dest) +{ + + EXCEPTION(EX_Invalid); + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + reg_move(&CONST_QNaN, dest); + } + + return !(control_word & CW_Invalid); + +} + + +/* Divide a finite number by zero */ +asmlinkage int divide_by_zero(int sign, FPU_REG *dest) +{ + + if ( control_word & CW_ZeroDiv ) + { + /* The masked response */ + reg_move(&CONST_INF, dest); + dest->sign = (unsigned char)sign; + } + + EXCEPTION(EX_ZeroDiv); + + return !(control_word & CW_ZeroDiv); + +} + + +/* This may be called often, so keep it lean */ +int set_precision_flag(int flags) +{ + if ( control_word & CW_Precision ) + { + partial_status &= ~(SW_C1 & flags); + partial_status |= flags; /* The masked response */ + return 0; + } + else + { + exception(flags); + return 1; + } +} + + +/* This may be called often, so keep it lean */ +asmlinkage void set_precision_flag_up(void) +{ + if ( control_word & CW_Precision ) + partial_status |= (SW_Precision | SW_C1); /* The masked response */ + else + exception(EX_Precision | SW_C1); + +} + + +/* This may be called often, so keep it lean */ +asmlinkage void set_precision_flag_down(void) +{ + if ( control_word & CW_Precision ) + { /* The masked response */ + partial_status &= ~SW_C1; + partial_status |= SW_Precision; + } + else + exception(EX_Precision); +} + + +asmlinkage int denormal_operand(void) +{ + if ( control_word & CW_Denormal ) + { /* The masked response */ + partial_status |= SW_Denorm_Op; + return 0; + } + else + { + exception(EX_Denormal); + return 1; + } +} + + +asmlinkage int arith_overflow(FPU_REG *dest) +{ + + if ( control_word & CW_Overflow ) + { + char sign; + /* The masked response */ +/* ###### The response here depends upon the rounding mode */ + sign = dest->sign; + reg_move(&CONST_INF, dest); + dest->sign = sign; + } + else + { + /* Subtract the magic number from the exponent */ + dest->exp -= (3 * (1 << 13)); + } + + EXCEPTION(EX_Overflow); + if ( control_word & CW_Overflow ) + { + /* The overflow exception is masked. */ + /* By definition, precision is lost. + The roundup bit (C1) is also set because we have + "rounded" upwards to Infinity. */ + EXCEPTION(EX_Precision | SW_C1); + return !(control_word & CW_Precision); + } + + return !(control_word & CW_Overflow); + +} + + +asmlinkage int arith_underflow(FPU_REG *dest) +{ + + if ( control_word & CW_Underflow ) + { + /* The masked response */ + if ( dest->exp <= EXP_UNDER - 63 ) + { + reg_move(&CONST_Z, dest); + partial_status &= ~SW_C1; /* Round down. */ + } + } + else + { + /* Add the magic number to the exponent. */ + dest->exp += (3 * (1 << 13)); + } + + EXCEPTION(EX_Underflow); + if ( control_word & CW_Underflow ) + { + /* The underflow exception is masked. */ + EXCEPTION(EX_Precision); + return !(control_word & CW_Precision); + } + + return !(control_word & CW_Underflow); + +} + + +void stack_overflow(void) +{ + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + top--; + reg_move(&CONST_QNaN, &st(0)); + } + + EXCEPTION(EX_StackOver); + + return; + +} + + +void stack_underflow(void) +{ + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + reg_move(&CONST_QNaN, &st(0)); + } + + EXCEPTION(EX_StackUnder); + + return; + +} + + +void stack_underflow_i(int i) +{ + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + reg_move(&CONST_QNaN, &(st(i))); + } + + EXCEPTION(EX_StackUnder); + + return; + +} + + +void stack_underflow_pop(int i) +{ + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + reg_move(&CONST_QNaN, &(st(i))); + pop(); + } + + EXCEPTION(EX_StackUnder); + + return; + +} + diff --git a/arch/i386/math-emu/exception.h b/arch/i386/math-emu/exception.h new file mode 100644 index 000000000..2e629a30c --- /dev/null +++ b/arch/i386/math-emu/exception.h @@ -0,0 +1,53 @@ +/*---------------------------------------------------------------------------+ + | exception.h | + | | + | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _EXCEPTION_H_ +#define _EXCEPTION_H_ + + +#ifdef __ASSEMBLER__ +#define Const_(x) $##x +#else +#define Const_(x) x +#endif + +#ifndef SW_C1 +#include "fpu_emu.h" +#endif SW_C1 + +#define FPU_BUSY Const_(0x8000) /* FPU busy bit (8087 compatibility) */ +#define EX_ErrorSummary Const_(0x0080) /* Error summary status */ +/* Special exceptions: */ +#define EX_INTERNAL Const_(0x8000) /* Internal error in wm-FPU-emu */ +#define EX_StackOver Const_(0x0041|SW_C1) /* stack overflow */ +#define EX_StackUnder Const_(0x0041) /* stack underflow */ +/* Exception flags: */ +#define EX_Precision Const_(0x0020) /* loss of precision */ +#define EX_Underflow Const_(0x0010) /* underflow */ +#define EX_Overflow Const_(0x0008) /* overflow */ +#define EX_ZeroDiv Const_(0x0004) /* divide by zero */ +#define EX_Denormal Const_(0x0002) /* denormalized operand */ +#define EX_Invalid Const_(0x0001) /* invalid operation */ + + +#define PRECISION_LOST_UP Const_((EX_Precision | SW_C1)) +#define PRECISION_LOST_DOWN Const_(EX_Precision) + + +#ifndef __ASSEMBLER__ + +#ifdef DEBUG +#define EXCEPTION(x) { printk("exception in %s at line %d\n", \ + __FILE__, __LINE__); exception(x); } +#else +#define EXCEPTION(x) exception(x) +#endif + +#endif __ASSEMBLER__ + +#endif _EXCEPTION_H_ diff --git a/arch/i386/math-emu/fpu_arith.c b/arch/i386/math-emu/fpu_arith.c new file mode 100644 index 000000000..96e6bd89b --- /dev/null +++ b/arch/i386/math-emu/fpu_arith.c @@ -0,0 +1,179 @@ +/*---------------------------------------------------------------------------+ + | fpu_arith.c | + | | + | Code to implement the FPU register/register arithmetic instructions | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "status_w.h" + + +void fadd__() +{ + /* fadd st,st(i) */ + clear_C1(); + reg_add(&st(0), &st(FPU_rm), &st(0), control_word); +} + + +void fmul__() +{ + /* fmul st,st(i) */ + clear_C1(); + reg_mul(&st(0), &st(FPU_rm), &st(0), control_word); +} + + + +void fsub__() +{ + /* fsub st,st(i) */ + clear_C1(); + reg_sub(&st(0), &st(FPU_rm), &st(0), control_word); +} + + +void fsubr_() +{ + /* fsubr st,st(i) */ + clear_C1(); + reg_sub(&st(FPU_rm), &st(0), &st(0), control_word); +} + + +void fdiv__() +{ + /* fdiv st,st(i) */ + clear_C1(); + reg_div(&st(0), &st(FPU_rm), &st(0), control_word); +} + + +void fdivr_() +{ + /* fdivr st,st(i) */ + clear_C1(); + reg_div(&st(FPU_rm), &st(0), &st(0), control_word); +} + + + +void fadd_i() +{ + /* fadd st(i),st */ + clear_C1(); + reg_add(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); +} + + +void fmul_i() +{ + /* fmul st(i),st */ + clear_C1(); + reg_mul(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); +} + + +void fsubri() +{ + /* fsubr st(i),st */ + /* This is the sense of the 80486 manual + reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word); */ + clear_C1(); + reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); +} + + +void fsub_i() +{ + /* fsub st(i),st */ + /* This is the sense of the 80486 manual + reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); */ + clear_C1(); + reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word); +} + + +void fdivri() +{ + /* fdivr st(i),st */ + clear_C1(); + reg_div(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); +} + + +void fdiv_i() +{ + /* fdiv st(i),st */ + clear_C1(); + reg_div(&st(FPU_rm), &st(0), &st(FPU_rm), control_word); +} + + + +void faddp_() +{ + /* faddp st(i),st */ + clear_C1(); + if ( !reg_add(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) ) + pop(); +} + + +void fmulp_() +{ + /* fmulp st(i),st */ + clear_C1(); + if ( !reg_mul(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) ) + pop(); +} + + + +void fsubrp() +{ + /* fsubrp st(i),st */ + /* This is the sense of the 80486 manual + reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word); */ + clear_C1(); + if ( !reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) ) + pop(); +} + + +void fsubp_() +{ + /* fsubp st(i),st */ + /* This is the sense of the 80486 manual + reg_sub(&st(0), &st(FPU_rm), &st(FPU_rm), control_word); */ + clear_C1(); + if ( !reg_sub(&st(FPU_rm), &st(0), &st(FPU_rm), control_word) ) + pop(); +} + + +void fdivrp() +{ + /* fdivrp st(i),st */ + clear_C1(); + if ( !reg_div(&st(0), &st(FPU_rm), &st(FPU_rm), control_word) ) + pop(); +} + + +void fdivp_() +{ + /* fdivp st(i),st */ + clear_C1(); + if ( !reg_div(&st(FPU_rm), &st(0), &st(FPU_rm), control_word) ) + pop(); +} + diff --git a/arch/i386/math-emu/fpu_asm.h b/arch/i386/math-emu/fpu_asm.h new file mode 100644 index 000000000..8eb60148d --- /dev/null +++ b/arch/i386/math-emu/fpu_asm.h @@ -0,0 +1,30 @@ +/*---------------------------------------------------------------------------+ + | fpu_asm.h | + | | + | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _FPU_ASM_H_ +#define _FPU_ASM_H_ + +#include "fpu_emu.h" + +#define EXCEPTION _exception + + +#define PARAM1 8(%ebp) +#define PARAM2 12(%ebp) +#define PARAM3 16(%ebp) +#define PARAM4 20(%ebp) + +#define SIGL_OFFSET 8 +#define SIGN(x) (x) +#define TAG(x) 1(x) +#define EXP(x) 4(x) +#define SIG(x) SIGL_OFFSET##(x) +#define SIGL(x) SIGL_OFFSET##(x) +#define SIGH(x) 12(x) + +#endif _FPU_ASM_H_ diff --git a/arch/i386/math-emu/fpu_aux.c b/arch/i386/math-emu/fpu_aux.c new file mode 100644 index 000000000..0d35fe19b --- /dev/null +++ b/arch/i386/math-emu/fpu_aux.c @@ -0,0 +1,184 @@ +/*---------------------------------------------------------------------------+ + | fpu_aux.c | + | | + | Code to implement some of the FPU auxiliary instructions. | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "control_w.h" + + +static void fnop(void) +{ +} + +void fclex(void) +{ + partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision| + SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op| + SW_Invalid); + no_ip_update = 1; +} + +/* Needs to be externally visible */ +void finit() +{ + int r; + control_word = 0x037f; + partial_status = 0; + top = 0; /* We don't keep top in the status word internally. */ + for (r = 0; r < 8; r++) + { + regs[r].tag = TW_Empty; + } + /* The behaviour is different to that detailed in + Section 15.1.6 of the Intel manual */ + operand_address.offset = 0; + operand_address.selector = 0; + instruction_address.offset = 0; + instruction_address.selector = 0; + instruction_address.opcode = 0; + no_ip_update = 1; +} + +/* + * These are nops on the i387.. + */ +#define feni fnop +#define fdisi fnop +#define fsetpm fnop + +static FUNC const finit_table[] = { + feni, fdisi, fclex, finit, + fsetpm, FPU_illegal, FPU_illegal, FPU_illegal +}; + +void finit_() +{ + (finit_table[FPU_rm])(); +} + + +static void fstsw_ax(void) +{ + *(short *) &FPU_EAX = status_word(); + no_ip_update = 1; +} + +static FUNC const fstsw_table[] = { + fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal, + FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal +}; + +void fstsw_() +{ + (fstsw_table[FPU_rm])(); +} + + +static FUNC const fp_nop_table[] = { + fnop, FPU_illegal, FPU_illegal, FPU_illegal, + FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal +}; + +void fp_nop() +{ + (fp_nop_table[FPU_rm])(); +} + + +void fld_i_() +{ + FPU_REG *st_new_ptr; + + if ( STACK_OVERFLOW ) + { stack_overflow(); return; } + + /* fld st(i) */ + if ( NOT_EMPTY(FPU_rm) ) + { reg_move(&st(FPU_rm), st_new_ptr); push(); } + else + { + if ( control_word & CW_Invalid ) + { + /* The masked response */ + stack_underflow(); + } + else + EXCEPTION(EX_StackUnder); + } + +} + + +void fxch_i() +{ + /* fxch st(i) */ + FPU_REG t; + register FPU_REG *sti_ptr = &st(FPU_rm), *st0_ptr = &st(0); + + if ( st0_ptr->tag == TW_Empty ) + { + if ( sti_ptr->tag == TW_Empty ) + { + stack_underflow(); + stack_underflow_i(FPU_rm); + return; + } + if ( control_word & CW_Invalid ) + reg_move(sti_ptr, st0_ptr); /* Masked response */ + stack_underflow_i(FPU_rm); + return; + } + if ( sti_ptr->tag == TW_Empty ) + { + if ( control_word & CW_Invalid ) + reg_move(st0_ptr, sti_ptr); /* Masked response */ + stack_underflow(); + return; + } + clear_C1(); + reg_move(st0_ptr, &t); + reg_move(sti_ptr, st0_ptr); + reg_move(&t, sti_ptr); +} + + +void ffree_() +{ + /* ffree st(i) */ + st(FPU_rm).tag = TW_Empty; +} + + +void ffreep() +{ + /* ffree st(i) + pop - unofficial code */ + st(FPU_rm).tag = TW_Empty; + pop(); +} + + +void fst_i_() +{ + /* fst st(i) */ + reg_move(&st(0), &st(FPU_rm)); +} + + +void fstp_i() +{ + /* fstp st(i) */ + reg_move(&st(0), &st(FPU_rm)); + pop(); +} + diff --git a/arch/i386/math-emu/fpu_emu.h b/arch/i386/math-emu/fpu_emu.h new file mode 100644 index 000000000..9d2c5dd13 --- /dev/null +++ b/arch/i386/math-emu/fpu_emu.h @@ -0,0 +1,171 @@ +/*---------------------------------------------------------------------------+ + | fpu_emu.h | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + + +#ifndef _FPU_EMU_H_ +#define _FPU_EMU_H_ + +/* + * Define DENORM_OPERAND to make the emulator detect denormals + * and use the denormal flag of the status word. Note: this only + * affects the flag and corresponding interrupt, the emulator + * will always generate denormals and operate upon them as required. + */ +#define DENORM_OPERAND + +/* + * Define PECULIAR_486 to get a closer approximation to 80486 behaviour, + * rather than behaviour which appears to be cleaner. + * This is a matter of opinion: for all I know, the 80486 may simply + * be complying with the IEEE spec. Maybe one day I'll get to see the + * spec... + */ +#define PECULIAR_486 + +#ifdef __ASSEMBLER__ +#include "fpu_asm.h" +#define Const(x) $##x +#else +#define Const(x) x +#endif + +#define EXP_BIAS Const(0) +#define EXP_OVER Const(0x4000) /* smallest invalid large exponent */ +#define EXP_UNDER Const(-0x3fff) /* largest invalid small exponent */ +#define EXP_Infinity EXP_OVER +#define EXP_NaN EXP_OVER + +#define SIGN_POS Const(0) +#define SIGN_NEG Const(1) + +/* Keep the order TW_Valid, TW_Zero, TW_Denormal */ +#define TW_Valid Const(0) /* valid */ +#define TW_Zero Const(1) /* zero */ +/* The following fold to 2 (Special) in the Tag Word */ +/* #define TW_Denormal Const(4) */ /* De-normal */ +#define TW_Infinity Const(5) /* + or - infinity */ +#define TW_NaN Const(6) /* Not a Number */ + +#define TW_Empty Const(7) /* empty */ + + +#ifndef __ASSEMBLER__ + +#include <linux/math_emu.h> +#include <linux/linkage.h> + +/* +#define RE_ENTRANT_CHECKING + */ + +#ifdef RE_ENTRANT_CHECKING +extern char emulating; +# define RE_ENTRANT_CHECK_OFF emulating = 0 +# define RE_ENTRANT_CHECK_ON emulating = 1 +#else +# define RE_ENTRANT_CHECK_OFF +# define RE_ENTRANT_CHECK_ON +#endif RE_ENTRANT_CHECKING + +#define FWAIT_OPCODE 0x9b +#define OP_SIZE_PREFIX 0x66 +#define ADDR_SIZE_PREFIX 0x67 +#define PREFIX_CS 0x2e +#define PREFIX_DS 0x3e +#define PREFIX_ES 0x26 +#define PREFIX_SS 0x36 +#define PREFIX_FS 0x64 +#define PREFIX_GS 0x65 +#define PREFIX_REPE 0xf3 +#define PREFIX_REPNE 0xf2 +#define PREFIX_LOCK 0xf0 +#define PREFIX_CS_ 1 +#define PREFIX_DS_ 2 +#define PREFIX_ES_ 3 +#define PREFIX_FS_ 4 +#define PREFIX_GS_ 5 +#define PREFIX_SS_ 6 +#define PREFIX_DEFAULT 7 + +struct address { + unsigned int offset; + unsigned int selector:16; + unsigned int opcode:11; + unsigned int empty:5; +}; +typedef void (*FUNC)(void); +typedef struct fpu_reg FPU_REG; +typedef void (*FUNC_ST0)(FPU_REG *st0_ptr); +typedef struct { unsigned char address_size, operand_size, segment; } + overrides; +/* This structure is 32 bits: */ +typedef struct { overrides override; + unsigned char default_mode; } fpu_addr_modes; +/* PROTECTED has a restricted meaning in the emulator; it is used + to signal that the emulator needs to do special things to ensure + that protection is respected in a segmented model. */ +#define PROTECTED 4 +#define SIXTEEN 1 /* We rely upon this being 1 (true) */ +#define VM86 SIXTEEN +#define PM16 (SIXTEEN | PROTECTED) +#define SEG32 PROTECTED +extern unsigned char const data_sizes_16[32]; + +#define st(x) ( regs[((top+x) &7 )] ) + +#define STACK_OVERFLOW (st_new_ptr = &st(-1), st_new_ptr->tag != TW_Empty) +#define NOT_EMPTY(i) (st(i).tag != TW_Empty) +#define NOT_EMPTY_ST0 (st0_tag ^ TW_Empty) + +#define pop() { regs[(top++ & 7 )].tag = TW_Empty; } +#define poppop() { regs[((top + 1) & 7 )].tag \ + = regs[(top & 7 )].tag = TW_Empty; \ + top += 2; } + +/* push() does not affect the tags */ +#define push() { top--; } + + +#define reg_move(x, y) { \ + *(short *)&((y)->sign) = *(short *)&((x)->sign); \ + *(long *)&((y)->exp) = *(long *)&((x)->exp); \ + *(long long *)&((y)->sigl) = *(long long *)&((x)->sigl); } + +#define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] ) + + +/*----- Prototypes for functions written in assembler -----*/ +/* extern void reg_move(FPU_REG *a, FPU_REG *b); */ + +asmlinkage void normalize(FPU_REG *x); +asmlinkage void normalize_nuo(FPU_REG *x); +asmlinkage int reg_div(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG *answ, unsigned int control_w); +asmlinkage int reg_u_sub(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG *answ, unsigned int control_w); +asmlinkage int reg_u_mul(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG *answ, unsigned int control_w); +asmlinkage int reg_u_div(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG *answ, unsigned int control_w); +asmlinkage int reg_u_add(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG *answ, unsigned int control_w); +asmlinkage int wm_sqrt(FPU_REG *n, unsigned int control_w); +asmlinkage unsigned shrx(void *l, unsigned x); +asmlinkage unsigned shrxs(void *v, unsigned x); +asmlinkage unsigned long div_small(unsigned long long *x, unsigned long y); +asmlinkage void round_reg(FPU_REG *arg, unsigned int extent, + unsigned int control_w); + +#ifndef MAKING_PROTO +#include "fpu_proto.h" +#endif + +#endif __ASSEMBLER__ + +#endif _FPU_EMU_H_ diff --git a/arch/i386/math-emu/fpu_entry.c b/arch/i386/math-emu/fpu_entry.c new file mode 100644 index 000000000..b2777a722 --- /dev/null +++ b/arch/i386/math-emu/fpu_entry.c @@ -0,0 +1,690 @@ +/*---------------------------------------------------------------------------+ + | fpu_entry.c | + | | + | The entry function for wm-FPU-emu | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | See the files "README" and "COPYING" for further copyright and warranty | + | information. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | math_emulate() is the sole entry point for wm-FPU-emu | + +---------------------------------------------------------------------------*/ + +#include <linux/signal.h> + +#include <asm/segment.h> + +#include "fpu_system.h" +#include "fpu_emu.h" +#include "exception.h" +#include "control_w.h" +#include "status_w.h" + +#define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */ + +#ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */ + +/* WARNING: These codes are not documented by Intel in their 80486 manual + and may not work on FPU clones or later Intel FPUs. */ + +/* Changes to support the un-doc codes provided by Linus Torvalds. */ + +#define _d9_d8_ fstp_i /* unofficial code (19) */ +#define _dc_d0_ fcom_st /* unofficial code (14) */ +#define _dc_d8_ fcompst /* unofficial code (1c) */ +#define _dd_c8_ fxch_i /* unofficial code (0d) */ +#define _de_d0_ fcompst /* unofficial code (16) */ +#define _df_c0_ ffreep /* unofficial code (07) ffree + pop */ +#define _df_c8_ fxch_i /* unofficial code (0f) */ +#define _df_d0_ fstp_i /* unofficial code (17) */ +#define _df_d8_ fstp_i /* unofficial code (1f) */ + +static FUNC const st_instr_table[64] = { + fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_, + fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_, + fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_, + fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_, + fsub__, fp_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, + fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, + fdiv__, trig_a, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, + fdivr_, trig_b, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, +}; + +#else /* Support only documented FPU op-codes */ + +static FUNC const st_instr_table[64] = { + fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__, + fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__, + fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__, + fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__, + fsub__, fp_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, + fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, + fdiv__, trig_a, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, + fdivr_, trig_b, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, +}; + +#endif NO_UNDOC_CODE + + +#define _NONE_ 0 /* Take no special action */ +#define _REG0_ 1 /* Need to check for not empty st(0) */ +#define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */ +#define _REGi_ 0 /* Uses st(rm) */ +#define _PUSH_ 3 /* Need to check for space to push onto stack */ +#define _null_ 4 /* Function illegal or not implemented */ +#define _REGIi 5 /* Uses st(0) and st(rm), result to st(rm) */ +#define _REGIp 6 /* Uses st(0) and st(rm), result to st(rm) then pop */ +#define _REGIc 0 /* Compare st(0) and st(rm) */ +#define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */ + +#ifndef NO_UNDOC_CODE + +/* Un-documented FPU op-codes supported by default. (see above) */ + +static unsigned char const type_table[64] = { + _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_, + _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_, + _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, + _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, + _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, + _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, + _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, + _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ +}; + +#else /* Support only documented FPU op-codes */ + +static unsigned char const type_table[64] = { + _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_, + _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_, + _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_, + _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_, + _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, + _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, + _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, + _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ +}; + +#endif NO_UNDOC_CODE + + +#ifdef RE_ENTRANT_CHECKING +char emulating=0; +#endif RE_ENTRANT_CHECKING + +static int valid_prefix(unsigned char *Byte, unsigned char **fpu_eip, + overrides *override); + +asmlinkage void math_emulate(long arg) +{ + unsigned char FPU_modrm, byte1; + unsigned short code; + fpu_addr_modes addr_modes; + int unmasked; + FPU_REG loaded_data; + void *data_address; + struct address data_sel_off; + struct address entry_sel_off; + unsigned long code_base = 0; + unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ + char st0_tag; + FPU_REG *st0_ptr; + struct desc_struct code_descriptor; + +#ifdef RE_ENTRANT_CHECKING + if ( emulating ) + { + printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n"); + } + RE_ENTRANT_CHECK_ON; +#endif RE_ENTRANT_CHECKING + + if (!current->used_math) + { + int i; + for ( i = 0; i < 8; i++ ) + { + /* Make sure that the registers are compatible + with the assumptions of the emulator. */ + regs[i].exp = 0; + regs[i].sigh = 0x80000000; + } + finit(); + current->used_math = 1; + } + + SETUP_DATA_AREA(arg); + + FPU_ORIG_EIP = FPU_EIP; + + if ( (FPU_EFLAGS & 0x00020000) != 0 ) + { + /* Virtual 8086 mode */ + addr_modes.default_mode = VM86; + FPU_EIP += code_base = FPU_CS << 4; + code_limit = code_base + 0xffff; /* Assumes code_base <= 0xffff0000 */ + } + else if ( FPU_CS == USER_CS && FPU_DS == USER_DS ) + { + addr_modes.default_mode = 0; + } + else if ( FPU_CS == KERNEL_CS ) + { + printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP); + panic("Math emulation needed in kernel"); + } + else + { + + if ( (FPU_CS & 4) != 4 ) /* Must be in the LDT */ + { + /* Can only handle segmented addressing via the LDT + for now, and it must be 16 bit */ + printk("FPU emulator: Unsupported addressing mode\n"); + math_abort(FPU_info, SIGILL); + } + + if ( SEG_D_SIZE(code_descriptor = LDT_DESCRIPTOR(FPU_CS)) ) + { + /* The above test may be wrong, the book is not clear */ + /* Segmented 32 bit protected mode */ + addr_modes.default_mode = SEG32; + } + else + { + /* 16 bit protected mode */ + addr_modes.default_mode = PM16; + } + FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor); + code_limit = code_base + + (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor) + - 1; + if ( code_limit < code_base ) code_limit = 0xffffffff; + } + + FPU_lookahead = 1; + if (current->flags & PF_PTRACED) + FPU_lookahead = 0; + + if ( !valid_prefix(&byte1, (unsigned char **)&FPU_EIP, + &addr_modes.override) ) + { + RE_ENTRANT_CHECK_OFF; + printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n" + "FPU emulator: self-modifying code! (emulation impossible)\n", + byte1); + RE_ENTRANT_CHECK_ON; + EXCEPTION(EX_INTERNAL|0x126); + math_abort(FPU_info,SIGILL); + } + +do_another_FPU_instruction: + + no_ip_update = 0; + + FPU_EIP++; /* We have fetched the prefix and first code bytes. */ + + if ( addr_modes.default_mode ) + { + /* This checks for the minimum instruction bytes. + We also need to check any extra (address mode) code access. */ + if ( FPU_EIP > code_limit ) + math_abort(FPU_info,SIGSEGV); + } + + if ( (byte1 & 0xf8) != 0xd8 ) + { + if ( byte1 == FWAIT_OPCODE ) + { + if (partial_status & SW_Summary) + goto do_the_FPU_interrupt; + else + goto FPU_fwait_done; + } +#ifdef PARANOID + EXCEPTION(EX_INTERNAL|0x128); + math_abort(FPU_info,SIGILL); +#endif PARANOID + } + + RE_ENTRANT_CHECK_OFF; + FPU_code_verify_area(1); + FPU_modrm = get_fs_byte((unsigned char *) FPU_EIP); + RE_ENTRANT_CHECK_ON; + FPU_EIP++; + + if (partial_status & SW_Summary) + { + /* Ignore the error for now if the current instruction is a no-wait + control instruction */ + /* The 80486 manual contradicts itself on this topic, + but a real 80486 uses the following instructions: + fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex. + */ + code = (FPU_modrm << 8) | byte1; + if ( ! ( (((code & 0xf803) == 0xe003) || /* fnclex, fninit, fnstsw */ + (((code & 0x3003) == 0x3001) && /* fnsave, fnstcw, fnstenv, + fnstsw */ + ((code & 0xc000) != 0xc000))) ) ) + { + /* + * We need to simulate the action of the kernel to FPU + * interrupts here. + * Currently, the "real FPU" part of the kernel (0.99.10) + * clears the exception flags, sets the registers to empty, + * and passes information back to the interrupted process + * via the cs selector and operand selector, so we do the same. + */ + do_the_FPU_interrupt: + instruction_address.selector = status_word(); + operand_address.selector = tag_word(); + partial_status = 0; + top = 0; + { + int r; + for (r = 0; r < 8; r++) + { + regs[r].tag = TW_Empty; + } + } + + FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */ + + RE_ENTRANT_CHECK_OFF; + current->tss.trap_no = 16; + current->tss.error_code = 0; + send_sig(SIGFPE, current, 1); + return; + } + } + + entry_sel_off.offset = FPU_ORIG_EIP; + entry_sel_off.selector = FPU_CS; + entry_sel_off.opcode = (byte1 << 8) | FPU_modrm; + + FPU_rm = FPU_modrm & 7; + + if ( FPU_modrm < 0300 ) + { + /* All of these instructions use the mod/rm byte to get a data address */ + + if ( (addr_modes.default_mode & SIXTEEN) + ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) ) + data_address = get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off, + addr_modes); + else + data_address = get_address(FPU_modrm, &FPU_EIP, &data_sel_off, + addr_modes); + + if ( addr_modes.default_mode ) + { + if ( FPU_EIP-1 > code_limit ) + math_abort(FPU_info,SIGSEGV); + } + + if ( !(byte1 & 1) ) + { + unsigned short status1 = partial_status; + + st0_ptr = &st(0); + st0_tag = st0_ptr->tag; + + /* Stack underflow has priority */ + if ( NOT_EMPTY_ST0 ) + { + if ( addr_modes.default_mode & PROTECTED ) + { + /* This table works for 16 and 32 bit protected mode */ + if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] ) + math_abort(FPU_info,SIGSEGV); + } + + unmasked = 0; /* Do this here to stop compiler warnings. */ + switch ( (byte1 >> 1) & 3 ) + { + case 0: + unmasked = reg_load_single((float *)data_address, + &loaded_data); + break; + case 1: + reg_load_int32((long *)data_address, &loaded_data); + break; + case 2: + unmasked = reg_load_double((double *)data_address, + &loaded_data); + break; + case 3: + reg_load_int16((short *)data_address, &loaded_data); + break; + } + + /* No more access to user memory, it is safe + to use static data now */ + + /* NaN operands have the next priority. */ + /* We have to delay looking at st(0) until after + loading the data, because that data might contain an SNaN */ + if ( (st0_tag == TW_NaN) || + (loaded_data.tag == TW_NaN) ) + { + /* Restore the status word; we might have loaded a + denormal. */ + partial_status = status1; + if ( (FPU_modrm & 0x30) == 0x10 ) + { + /* fcom or fcomp */ + EXCEPTION(EX_Invalid); + setcc(SW_C3 | SW_C2 | SW_C0); + if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) ) + pop(); /* fcomp, masked, so we pop. */ + } + else + { +#ifdef PECULIAR_486 + /* This is not really needed, but gives behaviour + identical to an 80486 */ + if ( (FPU_modrm & 0x28) == 0x20 ) + /* fdiv or fsub */ + real_2op_NaN(&loaded_data, st0_ptr, + st0_ptr); + else +#endif PECULIAR_486 + /* fadd, fdivr, fmul, or fsubr */ + real_2op_NaN(st0_ptr, &loaded_data, + st0_ptr); + } + goto reg_mem_instr_done; + } + + if ( unmasked && !((FPU_modrm & 0x30) == 0x10) ) + { + /* Is not a comparison instruction. */ + if ( (FPU_modrm & 0x38) == 0x38 ) + { + /* fdivr */ + if ( (st0_tag == TW_Zero) && + (loaded_data.tag == TW_Valid) ) + { + if ( divide_by_zero(loaded_data.sign, + st0_ptr) ) + { + /* We use the fact here that the unmasked + exception in the loaded data was for a + denormal operand */ + /* Restore the state of the denormal op bit */ + partial_status &= ~SW_Denorm_Op; + partial_status |= status1 & SW_Denorm_Op; + } + } + } + goto reg_mem_instr_done; + } + + switch ( (FPU_modrm >> 3) & 7 ) + { + case 0: /* fadd */ + clear_C1(); + reg_add(st0_ptr, &loaded_data, st0_ptr, + control_word); + break; + case 1: /* fmul */ + clear_C1(); + reg_mul(st0_ptr, &loaded_data, st0_ptr, + control_word); + break; + case 2: /* fcom */ + compare_st_data(&loaded_data); + break; + case 3: /* fcomp */ + if ( !compare_st_data(&loaded_data) && !unmasked ) + pop(); + break; + case 4: /* fsub */ + clear_C1(); + reg_sub(st0_ptr, &loaded_data, st0_ptr, + control_word); + break; + case 5: /* fsubr */ + clear_C1(); + reg_sub(&loaded_data, st0_ptr, st0_ptr, + control_word); + break; + case 6: /* fdiv */ + clear_C1(); + reg_div(st0_ptr, &loaded_data, st0_ptr, + control_word); + break; + case 7: /* fdivr */ + clear_C1(); + if ( st0_tag == TW_Zero ) + partial_status = status1; /* Undo any denorm tag, + zero-divide has priority. */ + reg_div(&loaded_data, st0_ptr, st0_ptr, + control_word); + break; + } + } + else + { + if ( (FPU_modrm & 0x30) == 0x10 ) + { + /* The instruction is fcom or fcomp */ + EXCEPTION(EX_StackUnder); + setcc(SW_C3 | SW_C2 | SW_C0); + if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) ) + pop(); /* fcomp */ + } + else + stack_underflow(); + } + reg_mem_instr_done: + operand_address = data_sel_off; + } + else + { + if ( !(no_ip_update = + load_store_instr(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1, + addr_modes, data_address)) ) + { + operand_address = data_sel_off; + } + } + + } + else + { + /* None of these instructions access user memory */ + unsigned char instr_index = (FPU_modrm & 0x38) | (byte1 & 7); + +#ifdef PECULIAR_486 + /* This is supposed to be undefined, but a real 80486 seems + to do this: */ + operand_address.offset = 0; + operand_address.selector = FPU_DS; +#endif PECULIAR_486 + + st0_ptr = &st(0); + st0_tag = st0_ptr->tag; + switch ( type_table[(int) instr_index] ) + { + case _NONE_: /* also _REGIc: _REGIn */ + break; + case _REG0_: + if ( !NOT_EMPTY_ST0 ) + { + stack_underflow(); + goto FPU_instruction_done; + } + break; + case _REGIi: + if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) + { + stack_underflow_i(FPU_rm); + goto FPU_instruction_done; + } + break; + case _REGIp: + if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) + { + stack_underflow_pop(FPU_rm); + goto FPU_instruction_done; + } + break; + case _REGI_: + if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) + { + stack_underflow(); + goto FPU_instruction_done; + } + break; + case _PUSH_: /* Only used by the fld st(i) instruction */ + break; + case _null_: + FPU_illegal(); + goto FPU_instruction_done; + default: + EXCEPTION(EX_INTERNAL|0x111); + goto FPU_instruction_done; + } + (*st_instr_table[(int) instr_index])(); + +FPU_instruction_done: + ; + } + + if ( ! no_ip_update ) + instruction_address = entry_sel_off; + +FPU_fwait_done: + +#ifdef DEBUG + RE_ENTRANT_CHECK_OFF; + emu_printall(); + RE_ENTRANT_CHECK_ON; +#endif DEBUG + + if (FPU_lookahead && !need_resched) + { + FPU_ORIG_EIP = FPU_EIP - code_base; + if ( valid_prefix(&byte1, (unsigned char **)&FPU_EIP, + &addr_modes.override) ) + goto do_another_FPU_instruction; + } + + if ( addr_modes.default_mode ) + FPU_EIP -= code_base; + + RE_ENTRANT_CHECK_OFF; +} + + +/* Support for prefix bytes is not yet complete. To properly handle + all prefix bytes, further changes are needed in the emulator code + which accesses user address space. Access to separate segments is + important for msdos emulation. */ +static int valid_prefix(unsigned char *Byte, unsigned char **fpu_eip, + overrides *override) +{ + unsigned char byte; + unsigned char *ip = *fpu_eip; + + *override = (overrides) { 0, 0, PREFIX_DEFAULT }; /* defaults */ + + RE_ENTRANT_CHECK_OFF; + FPU_code_verify_area(1); + byte = get_fs_byte(ip); + RE_ENTRANT_CHECK_ON; + + while ( 1 ) + { + switch ( byte ) + { + case ADDR_SIZE_PREFIX: + override->address_size = ADDR_SIZE_PREFIX; + goto do_next_byte; + + case OP_SIZE_PREFIX: + override->operand_size = OP_SIZE_PREFIX; + goto do_next_byte; + + case PREFIX_CS: + override->segment = PREFIX_CS_; + goto do_next_byte; + case PREFIX_ES: + override->segment = PREFIX_ES_; + goto do_next_byte; + case PREFIX_SS: + override->segment = PREFIX_SS_; + goto do_next_byte; + case PREFIX_FS: + override->segment = PREFIX_FS_; + goto do_next_byte; + case PREFIX_GS: + override->segment = PREFIX_GS_; + goto do_next_byte; + case PREFIX_DS: + override->segment = PREFIX_DS_; + goto do_next_byte; + +/* lock is not a valid prefix for FPU instructions, + let the cpu handle it to generate a SIGILL. */ +/* case PREFIX_LOCK: */ + + /* rep.. prefixes have no meaning for FPU instructions */ + case PREFIX_REPE: + case PREFIX_REPNE: + + do_next_byte: + ip++; + RE_ENTRANT_CHECK_OFF; + FPU_code_verify_area(1); + byte = get_fs_byte(ip); + RE_ENTRANT_CHECK_ON; + break; + case FWAIT_OPCODE: + *Byte = byte; + return 1; + default: + if ( (byte & 0xf8) == 0xd8 ) + { + *Byte = byte; + *fpu_eip = ip; + return 1; + } + else + { + /* Not a valid sequence of prefix bytes followed by + an FPU instruction. */ + *Byte = byte; /* Needed for error message. */ + return 0; + } + } + } +} + + +void math_abort(struct info * info, unsigned int signal) +{ + FPU_EIP = FPU_ORIG_EIP; + current->tss.trap_no = 16; + current->tss.error_code = 0; + send_sig(signal,current,1); + RE_ENTRANT_CHECK_OFF; + __asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4)); +#ifdef PARANOID + printk("ERROR: wm-FPU-emu math_abort failed!\n"); +#endif PARANOID +} diff --git a/arch/i386/math-emu/fpu_etc.c b/arch/i386/math-emu/fpu_etc.c new file mode 100644 index 000000000..20e3294ca --- /dev/null +++ b/arch/i386/math-emu/fpu_etc.c @@ -0,0 +1,129 @@ +/*---------------------------------------------------------------------------+ + | fpu_etc.c | + | | + | Implement a few FPU instructions. | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "reg_constant.h" + + +static void fchs(FPU_REG *st0_ptr) +{ + if ( st0_ptr->tag ^ TW_Empty ) + { + st0_ptr->sign ^= SIGN_POS^SIGN_NEG; + clear_C1(); + } + else + stack_underflow(); +} + +static void fabs(FPU_REG *st0_ptr) +{ + if ( st0_ptr->tag ^ TW_Empty ) + { + st0_ptr->sign = SIGN_POS; + clear_C1(); + } + else + stack_underflow(); +} + + +static void ftst_(FPU_REG *st0_ptr) +{ + switch (st0_ptr->tag) + { + case TW_Zero: + setcc(SW_C3); + break; + case TW_Valid: + if (st0_ptr->sign == SIGN_POS) + setcc(0); + else + setcc(SW_C0); + +#ifdef DENORM_OPERAND + if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + { +#ifdef PECULIAR_486 + /* This is weird! */ + if (st0_ptr->sign == SIGN_POS) + setcc(SW_C3); +#endif PECULIAR_486 + return; + } +#endif DENORM_OPERAND + + break; + case TW_NaN: + setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */ + EXCEPTION(EX_Invalid); + break; + case TW_Infinity: + if (st0_ptr->sign == SIGN_POS) + setcc(0); + else + setcc(SW_C0); + break; + case TW_Empty: + setcc(SW_C0|SW_C2|SW_C3); + EXCEPTION(EX_StackUnder); + break; + default: + setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */ + EXCEPTION(EX_INTERNAL|0x14); + break; + } +} + +static void fxam(FPU_REG *st0_ptr) +{ + int c=0; + switch (st0_ptr->tag) + { + case TW_Empty: + c = SW_C3|SW_C0; + break; + case TW_Zero: + c = SW_C3; + break; + case TW_Valid: + /* This will need to be changed if TW_Denormal is ever used. */ + if ( st0_ptr->exp <= EXP_UNDER ) + c = SW_C2|SW_C3; /* Denormal */ + else + c = SW_C2; + break; + case TW_NaN: + c = SW_C0; + break; + case TW_Infinity: + c = SW_C2|SW_C0; + break; + } + if (st0_ptr->sign == SIGN_NEG) + c |= SW_C1; + setcc(c); +} + + +static FUNC_ST0 const fp_etc_table[] = { + fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal, + ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal +}; + +void fp_etc() +{ + (fp_etc_table[FPU_rm])(&st(0)); +} diff --git a/arch/i386/math-emu/fpu_proto.h b/arch/i386/math-emu/fpu_proto.h new file mode 100644 index 000000000..b4392fe57 --- /dev/null +++ b/arch/i386/math-emu/fpu_proto.h @@ -0,0 +1,137 @@ +/* errors.c */ +extern void Un_impl(void); +extern void FPU_illegal(void); +extern void emu_printall(void); +extern void stack_overflow(void); +extern void stack_underflow(void); +extern void stack_underflow_i(int i); +extern void stack_underflow_pop(int i); +extern int set_precision_flag(int flags); +asmlinkage void exception(int n); +asmlinkage int real_2op_NaN(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest); +asmlinkage int arith_invalid(FPU_REG *dest); +asmlinkage int divide_by_zero(int sign, FPU_REG *dest); +asmlinkage void set_precision_flag_up(void); +asmlinkage void set_precision_flag_down(void); +asmlinkage int denormal_operand(void); +asmlinkage int arith_overflow(FPU_REG *dest); +asmlinkage int arith_underflow(FPU_REG *dest); + +/* fpu_arith.c */ +extern void fadd__(void); +extern void fmul__(void); +extern void fsub__(void); +extern void fsubr_(void); +extern void fdiv__(void); +extern void fdivr_(void); +extern void fadd_i(void); +extern void fmul_i(void); +extern void fsubri(void); +extern void fsub_i(void); +extern void fdivri(void); +extern void fdiv_i(void); +extern void faddp_(void); +extern void fmulp_(void); +extern void fsubrp(void); +extern void fsubp_(void); +extern void fdivrp(void); +extern void fdivp_(void); + +/* fpu_aux.c */ +extern void fclex(void); +extern void finit(void); +extern void finit_(void); +extern void fstsw_(void); +extern void fp_nop(void); +extern void fld_i_(void); +extern void fxch_i(void); +extern void ffree_(void); +extern void ffreep(void); +extern void fst_i_(void); +extern void fstp_i(void); + +/* fpu_entry.c */ +asmlinkage void math_emulate(long arg); +extern void math_abort(struct info *info, unsigned int signal); + +/* fpu_etc.c */ +extern void fp_etc(void); + +/* fpu_trig.c */ +extern void convert_l2reg(long const *arg, FPU_REG *dest); +extern void trig_a(void); +extern void trig_b(void); + +/* get_address.c */ +extern void *get_address(unsigned char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, + fpu_addr_modes); +extern void *get_address_16(unsigned char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, + fpu_addr_modes); + +/* load_store.c */ +extern int load_store_instr(unsigned char type, fpu_addr_modes addr_modes, + void *address); + +/* poly_2xm1.c */ +extern int poly_2xm1(FPU_REG const *arg, FPU_REG *result); + +/* poly_atan.c */ +extern void poly_atan(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *result); + +/* poly_l2.c */ +extern void poly_l2(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result); +extern int poly_l2p1(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result); + +/* poly_sin.c */ +extern void poly_sine(FPU_REG const *arg, FPU_REG *result); +extern void poly_cos(FPU_REG const *arg, FPU_REG *result); + +/* poly_tan.c */ +extern void poly_tan(FPU_REG const *arg, FPU_REG *result); + +/* reg_add_sub.c */ +extern int reg_add(FPU_REG const *a, FPU_REG const *b, + FPU_REG *dest, int control_w); +extern int reg_sub(FPU_REG const *a, FPU_REG const *b, + FPU_REG *dest, int control_w); + +/* reg_compare.c */ +extern int compare(FPU_REG const *b); +extern int compare_st_data(FPU_REG const *b); +extern void fcom_st(void); +extern void fcompst(void); +extern void fcompp(void); +extern void fucom_(void); +extern void fucomp(void); +extern void fucompp(void); + +/* reg_constant.c */ +extern void fconst(void); + +/* reg_ld_str.c */ +extern int reg_load_extended(long double *addr, FPU_REG *loaded_data); +extern int reg_load_double(double *dfloat, FPU_REG *loaded_data); +extern int reg_load_single(float *single, FPU_REG *loaded_data); +extern void reg_load_int64(long long *_s, FPU_REG *loaded_data); +extern void reg_load_int32(long *_s, FPU_REG *loaded_data); +extern void reg_load_int16(short *_s, FPU_REG *loaded_data); +extern void reg_load_bcd(char *s, FPU_REG *loaded_data); +extern int reg_store_extended(long double *d, FPU_REG *st0_ptr); +extern int reg_store_double(double *dfloat, FPU_REG *st0_ptr); +extern int reg_store_single(float *single, FPU_REG *st0_ptr); +extern int reg_store_int64(long long *d, FPU_REG *st0_ptr); +extern int reg_store_int32(long *d, FPU_REG *st0_ptr); +extern int reg_store_int16(short *d, FPU_REG *st0_ptr); +extern int reg_store_bcd(char *d, FPU_REG *st0_ptr); +extern int round_to_int(FPU_REG *r); +extern char *fldenv(fpu_addr_modes addr_modes, char *address); +extern void frstor(fpu_addr_modes addr_modes, char *address); +extern unsigned short tag_word(void); +extern char *fstenv(fpu_addr_modes addr_modes, char *address); +extern void fsave(fpu_addr_modes addr_modes, char *address); + +/* reg_mul.c */ +extern int reg_mul(FPU_REG const *a, FPU_REG const *b, + FPU_REG *dest, unsigned int control_w); diff --git a/arch/i386/math-emu/fpu_system.h b/arch/i386/math-emu/fpu_system.h new file mode 100644 index 000000000..d2c3fa716 --- /dev/null +++ b/arch/i386/math-emu/fpu_system.h @@ -0,0 +1,83 @@ +/*---------------------------------------------------------------------------+ + | fpu_system.h | + | | + | Copyright (C) 1992,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _FPU_SYSTEM_H +#define _FPU_SYSTEM_H + +/* system dependent definitions */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> + +/* This sets the pointer FPU_info to point to the argument part + of the stack frame of math_emulate() */ +#define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg + +#define LDT_DESCRIPTOR(s) (current->ldt[(s) >> 3]) +#define SEG_D_SIZE(x) ((x).b & (3 << 21)) +#define SEG_G_BIT(x) ((x).b & (1 << 23)) +#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) +#define SEG_286_MODE(x) ((x).b & ( 0xff000000 | 0xf0000 | (1 << 23))) +#define SEG_BASE_ADDR(s) (((s).b & 0xff000000) \ + | (((s).b & 0xff) << 16) | ((s).a >> 16)) +#define SEG_LIMIT(s) (((s).b & 0xff0000) | ((s).a & 0xffff)) +#define SEG_EXECUTE_ONLY(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 11)) +#define SEG_WRITE_PERM(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9)) +#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ + == (1 << 10)) + +#define I387 (current->tss.i387) +#define FPU_info (I387.soft.info) + +#define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) +#define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) +#define FPU_DS (*(unsigned short *) &(FPU_info->___ds)) +#define FPU_EAX (FPU_info->___eax) +#define FPU_EFLAGS (FPU_info->___eflags) +#define FPU_EIP (FPU_info->___eip) +#define FPU_ORIG_EIP (FPU_info->___orig_eip) + +#define FPU_lookahead (I387.soft.lookahead) + +/* nz if ip_offset and cs_selector are not to be set for the current + instruction. */ +#define no_ip_update (((char *)&(I387.soft.twd))[0]) +#define FPU_rm (((unsigned char *)&(I387.soft.twd))[1]) + +/* Number of bytes of data which can be legally accessed by the current + instruction. This only needs to hold a number <= 108, so a byte will do. */ +#define access_limit (((unsigned char *)&(I387.soft.twd))[2]) + +#define partial_status (I387.soft.swd) +#define control_word (I387.soft.cwd) +#define regs (I387.soft.regs) +#define top (I387.soft.top) + +#define instruction_address (*(struct address *)&I387.soft.fip) +#define operand_address (*(struct address *)&I387.soft.foo) + +#define FPU_verify_area(x,y,z) if ( verify_area(x,y,z) ) \ + math_abort(FPU_info,SIGSEGV) + +#undef FPU_IGNORE_CODE_SEGV +#ifdef FPU_IGNORE_CODE_SEGV +/* verify_area() is very expensive, and causes the emulator to run + about 20% slower if applied to the code. Anyway, errors due to bad + code addresses should be much rarer than errors due to bad data + addresses. */ +#define FPU_code_verify_area(z) +#else +/* A simpler test than verify_area() can probably be done for + FPU_code_verify_area() because the only possible error is to step + past the upper boundary of a legal code area. */ +#define FPU_code_verify_area(z) FPU_verify_area(VERIFY_READ,(void *)FPU_EIP,z) +#endif + +#endif diff --git a/arch/i386/math-emu/fpu_trig.c b/arch/i386/math-emu/fpu_trig.c new file mode 100644 index 000000000..05241f700 --- /dev/null +++ b/arch/i386/math-emu/fpu_trig.c @@ -0,0 +1,1718 @@ +/*---------------------------------------------------------------------------+ + | fpu_trig.c | + | | + | Implementation of the FPU "transcendental" functions. | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "control_w.h" +#include "reg_constant.h" + + +static void rem_kernel(unsigned long long st0, unsigned long long *y, + unsigned long long st1, + unsigned long long q, int n); + +#define BETTER_THAN_486 + +#define FCOS 4 +/* Not needed now with new code +#define FPTAN 1 + */ + +/* Used only by fptan, fsin, fcos, and fsincos. */ +/* This routine produces very accurate results, similar to + using a value of pi with more than 128 bits precision. */ +/* Limited measurements show no results worse than 64 bit precision + except for the results for arguments close to 2^63, where the + precision of the result sometimes degrades to about 63.9 bits */ +static int trig_arg(FPU_REG *X, int even) +{ + FPU_REG tmp; + unsigned long long q; + int old_cw = control_word, saved_status = partial_status; + + if ( X->exp >= EXP_BIAS + 63 ) + { + partial_status |= SW_C2; /* Reduction incomplete. */ + return -1; + } + + control_word &= ~CW_RC; + control_word |= RC_CHOP; + + reg_div(X, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f); + round_to_int(&tmp); /* Fortunately, this can't overflow + to 2^64 */ + q = significand(&tmp); + if ( q ) + { + rem_kernel(significand(X), + &significand(&tmp), + significand(&CONST_PI2), + q, X->exp - CONST_PI2.exp); + tmp.exp = CONST_PI2.exp; + normalize(&tmp); + reg_move(&tmp, X); + } + +#ifdef FPTAN + if ( even == FPTAN ) + { + if ( ((X->exp >= EXP_BIAS) || + ((X->exp == EXP_BIAS-1) + && (X->sigh >= 0xc90fdaa2))) ^ (q & 1) ) + even = FCOS; + else + even = 0; + } +#endif FPTAN + + if ( (even && !(q & 1)) || (!even && (q & 1)) ) + { + reg_sub(&CONST_PI2, X, X, FULL_PRECISION); +#ifdef BETTER_THAN_486 + /* So far, the results are exact but based upon a 64 bit + precision approximation to pi/2. The technique used + now is equivalent to using an approximation to pi/2 which + is accurate to about 128 bits. */ + if ( (X->exp <= CONST_PI2extra.exp + 64) || (q > 1) ) + { + /* This code gives the effect of having p/2 to better than + 128 bits precision. */ + significand(&tmp) = q + 1; + tmp.exp = EXP_BIAS + 63; + tmp.tag = TW_Valid; + normalize(&tmp); + reg_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION); + reg_add(X, &tmp, X, FULL_PRECISION); + if ( X->sign == SIGN_NEG ) + { + /* CONST_PI2extra is negative, so the result of the addition + can be negative. This means that the argument is actually + in a different quadrant. The correction is always < pi/2, + so it can't overflow into yet another quadrant. */ + X->sign = SIGN_POS; + q++; + } + } +#endif BETTER_THAN_486 + } +#ifdef BETTER_THAN_486 + else + { + /* So far, the results are exact but based upon a 64 bit + precision approximation to pi/2. The technique used + now is equivalent to using an approximation to pi/2 which + is accurate to about 128 bits. */ + if ( ((q > 0) && (X->exp <= CONST_PI2extra.exp + 64)) || (q > 1) ) + { + /* This code gives the effect of having p/2 to better than + 128 bits precision. */ + significand(&tmp) = q; + tmp.exp = EXP_BIAS + 63; + tmp.tag = TW_Valid; + normalize(&tmp); + reg_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION); + reg_sub(X, &tmp, X, FULL_PRECISION); + if ( (X->exp == CONST_PI2.exp) && + ((X->sigh > CONST_PI2.sigh) + || ((X->sigh == CONST_PI2.sigh) + && (X->sigl > CONST_PI2.sigl))) ) + { + /* CONST_PI2extra is negative, so the result of the + subtraction can be larger than pi/2. This means + that the argument is actually in a different quadrant. + The correction is always < pi/2, so it can't overflow + into yet another quadrant. */ + reg_sub(&CONST_PI, X, X, FULL_PRECISION); + q++; + } + } + } +#endif BETTER_THAN_486 + + control_word = old_cw; + partial_status = saved_status & ~SW_C2; /* Reduction complete. */ + + return (q & 3) | even; +} + + +/* Convert a long to register */ +void convert_l2reg(long const *arg, FPU_REG *dest) +{ + long num = *arg; + + if (num == 0) + { reg_move(&CONST_Z, dest); return; } + + if (num > 0) + dest->sign = SIGN_POS; + else + { num = -num; dest->sign = SIGN_NEG; } + + dest->sigh = num; + dest->sigl = 0; + dest->exp = EXP_BIAS + 31; + dest->tag = TW_Valid; + normalize(dest); +} + + +static void single_arg_error(FPU_REG *st0_ptr) +{ + switch ( st0_ptr->tag ) + { + case TW_NaN: + if ( !(st0_ptr->sigh & 0x40000000) ) /* Signaling ? */ + { + EXCEPTION(EX_Invalid); + if ( control_word & CW_Invalid ) + st0_ptr->sigh |= 0x40000000; /* Convert to a QNaN */ + } + break; /* return with a NaN in st(0) */ + case TW_Empty: + stack_underflow(); /* Puts a QNaN in st(0) */ + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x0112); +#endif PARANOID + } +} + + +static void single_arg_2_error(FPU_REG *st0_ptr) +{ + FPU_REG *st_new_ptr; + + switch ( st0_ptr->tag ) + { + case TW_NaN: + if ( !(st0_ptr->sigh & 0x40000000) ) /* Signaling ? */ + { + EXCEPTION(EX_Invalid); + if ( control_word & CW_Invalid ) + { + /* The masked response */ + /* Convert to a QNaN */ + st0_ptr->sigh |= 0x40000000; + st_new_ptr = &st(-1); + push(); + reg_move(&st(1), st_new_ptr); + } + } + else + { + /* A QNaN */ + st_new_ptr = &st(-1); + push(); + reg_move(&st(1), st_new_ptr); + } + break; /* return with a NaN in st(0) */ +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x0112); +#endif PARANOID + } +} + + +/*---------------------------------------------------------------------------*/ + +static void f2xm1(FPU_REG *st0_ptr) +{ + clear_C1(); + switch ( st0_ptr->tag ) + { + case TW_Valid: + { + if ( st0_ptr->exp >= 0 ) + { + /* For an 80486 FPU, the result is undefined. */ + } +#ifdef DENORM_OPERAND + else if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + else + { + /* poly_2xm1(x) requires 0 < x < 1. */ + poly_2xm1(st0_ptr, st0_ptr); + } + if ( st0_ptr->exp <= EXP_UNDER ) + { + /* A denormal result has been produced. + Precision must have been lost, this is always + an underflow. */ + arith_underflow(st0_ptr); + } + set_precision_flag_up(); /* 80486 appears to always do this */ + return; + } + case TW_Zero: + return; + case TW_Infinity: + if ( st0_ptr->sign == SIGN_NEG ) + { + /* -infinity gives -1 (p16-10) */ + reg_move(&CONST_1, st0_ptr); + st0_ptr->sign = SIGN_NEG; + } + return; + default: + single_arg_error(st0_ptr); + } +} + + +static void fptan(FPU_REG *st0_ptr) +{ + char st0_tag = st0_ptr->tag; + FPU_REG *st_new_ptr; + int q; + char arg_sign = st0_ptr->sign; + + /* Stack underflow has higher priority */ + if ( st0_tag == TW_Empty ) + { + stack_underflow(); /* Puts a QNaN in st(0) */ + if ( control_word & CW_Invalid ) + { + st_new_ptr = &st(-1); + push(); + stack_underflow(); /* Puts a QNaN in the new st(0) */ + } + return; + } + + if ( STACK_OVERFLOW ) + { stack_overflow(); return; } + + switch ( st0_tag ) + { + case TW_Valid: + if ( st0_ptr->exp > EXP_BIAS - 40 ) + { + st0_ptr->sign = SIGN_POS; + if ( (q = trig_arg(st0_ptr, 0)) != -1 ) + { + poly_tan(st0_ptr, st0_ptr); + st0_ptr->sign = (q & 1) ^ arg_sign; + } + else + { + /* Operand is out of range */ + st0_ptr->sign = arg_sign; /* restore st(0) */ + return; + } + set_precision_flag_up(); /* We do not really know if up or down */ + } + else + { + /* For a small arg, the result == the argument */ + /* Underflow may happen */ + + if ( st0_ptr->exp <= EXP_UNDER ) + { +#ifdef DENORM_OPERAND + if ( denormal_operand() ) + return; +#endif DENORM_OPERAND + /* A denormal result has been produced. + Precision must have been lost, this is always + an underflow. */ + if ( arith_underflow(st0_ptr) ) + return; + } + set_precision_flag_down(); /* Must be down. */ + } + push(); + reg_move(&CONST_1, st_new_ptr); + return; + break; + case TW_Infinity: + /* The 80486 treats infinity as an invalid operand */ + arith_invalid(st0_ptr); + if ( control_word & CW_Invalid ) + { + st_new_ptr = &st(-1); + push(); + arith_invalid(st_new_ptr); + } + return; + case TW_Zero: + push(); + reg_move(&CONST_1, st_new_ptr); + setcc(0); + break; + default: + single_arg_2_error(st0_ptr); + break; + } +} + + +static void fxtract(FPU_REG *st0_ptr) +{ + char st0_tag = st0_ptr->tag; + FPU_REG *st_new_ptr; + register FPU_REG *st1_ptr = st0_ptr; /* anticipate */ + + if ( STACK_OVERFLOW ) + { stack_overflow(); return; } + clear_C1(); + if ( !(st0_tag ^ TW_Valid) ) + { + long e; + +#ifdef DENORM_OPERAND + if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + push(); + reg_move(st1_ptr, st_new_ptr); + st_new_ptr->exp = EXP_BIAS; + e = st1_ptr->exp - EXP_BIAS; + convert_l2reg(&e, st1_ptr); + return; + } + else if ( st0_tag == TW_Zero ) + { + char sign = st0_ptr->sign; + if ( divide_by_zero(SIGN_NEG, st0_ptr) ) + return; + push(); + reg_move(&CONST_Z, st_new_ptr); + st_new_ptr->sign = sign; + return; + } + else if ( st0_tag == TW_Infinity ) + { + char sign = st0_ptr->sign; + st0_ptr->sign = SIGN_POS; + push(); + reg_move(&CONST_INF, st_new_ptr); + st_new_ptr->sign = sign; + return; + } + else if ( st0_tag == TW_NaN ) + { + if ( real_2op_NaN(st0_ptr, st0_ptr, st0_ptr) ) + return; + push(); + reg_move(st1_ptr, st_new_ptr); + return; + } + else if ( st0_tag == TW_Empty ) + { + /* Is this the correct behaviour? */ + if ( control_word & EX_Invalid ) + { + stack_underflow(); + push(); + stack_underflow(); + } + else + EXCEPTION(EX_StackUnder); + } +#ifdef PARANOID + else + EXCEPTION(EX_INTERNAL | 0x119); +#endif PARANOID +} + + +static void fdecstp(FPU_REG *st0_ptr) +{ + clear_C1(); + top--; /* st0_ptr will be fixed in math_emulate() before the next instr */ +} + +static void fincstp(FPU_REG *st0_ptr) +{ + clear_C1(); + top++; /* st0_ptr will be fixed in math_emulate() before the next instr */ +} + + +static void fsqrt_(FPU_REG *st0_ptr) +{ + char st0_tag = st0_ptr->tag; + + clear_C1(); + if ( !(st0_tag ^ TW_Valid) ) + { + int expon; + + if (st0_ptr->sign == SIGN_NEG) + { + arith_invalid(st0_ptr); /* sqrt(negative) is invalid */ + return; + } + +#ifdef DENORM_OPERAND + if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + expon = st0_ptr->exp - EXP_BIAS; + st0_ptr->exp = EXP_BIAS + (expon & 1); /* make st(0) in [1.0 .. 4.0) */ + + wm_sqrt(st0_ptr, control_word); /* Do the computation */ + + st0_ptr->exp += expon >> 1; + st0_ptr->sign = SIGN_POS; + } + else if ( st0_tag == TW_Zero ) + return; + else if ( st0_tag == TW_Infinity ) + { + if ( st0_ptr->sign == SIGN_NEG ) + arith_invalid(st0_ptr); /* sqrt(-Infinity) is invalid */ + return; + } + else + { single_arg_error(st0_ptr); return; } + +} + + +static void frndint_(FPU_REG *st0_ptr) +{ + char st0_tag = st0_ptr->tag; + int flags; + + if ( !(st0_tag ^ TW_Valid) ) + { + if (st0_ptr->exp > EXP_BIAS+63) + return; + +#ifdef DENORM_OPERAND + if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + /* Fortunately, this can't overflow to 2^64 */ + if ( (flags = round_to_int(st0_ptr)) ) + set_precision_flag(flags); + + st0_ptr->exp = EXP_BIAS + 63; + normalize(st0_ptr); + return; + } + else if ( (st0_tag == TW_Zero) || (st0_tag == TW_Infinity) ) + return; + else + single_arg_error(st0_ptr); +} + + +static void fsin(FPU_REG *st0_ptr) +{ + char st0_tag = st0_ptr->tag; + char arg_sign = st0_ptr->sign; + + if ( st0_tag == TW_Valid ) + { + FPU_REG rv; + int q; + + if ( st0_ptr->exp > EXP_BIAS - 40 ) + { + st0_ptr->sign = SIGN_POS; + if ( (q = trig_arg(st0_ptr, 0)) != -1 ) + { + + poly_sine(st0_ptr, &rv); + + if (q & 2) + rv.sign ^= SIGN_POS ^ SIGN_NEG; + rv.sign ^= arg_sign; + reg_move(&rv, st0_ptr); + + /* We do not really know if up or down */ + set_precision_flag_up(); + return; + } + else + { + /* Operand is out of range */ + st0_ptr->sign = arg_sign; /* restore st(0) */ + return; + } + } + else + { + /* For a small arg, the result == the argument */ + /* Underflow may happen */ + + if ( st0_ptr->exp <= EXP_UNDER ) + { +#ifdef DENORM_OPERAND + if ( denormal_operand() ) + return; +#endif DENORM_OPERAND + /* A denormal result has been produced. + Precision must have been lost, this is always + an underflow. */ + arith_underflow(st0_ptr); + return; + } + + set_precision_flag_up(); /* Must be up. */ + } + } + else if ( st0_tag == TW_Zero ) + { + setcc(0); + return; + } + else if ( st0_tag == TW_Infinity ) + { + /* The 80486 treats infinity as an invalid operand */ + arith_invalid(st0_ptr); + return; + } + else + single_arg_error(st0_ptr); +} + + +static int f_cos(FPU_REG *arg) +{ + char arg_sign = arg->sign; + + if ( arg->tag == TW_Valid ) + { + FPU_REG rv; + int q; + + if ( arg->exp > EXP_BIAS - 40 ) + { + arg->sign = SIGN_POS; + if ( (arg->exp < EXP_BIAS) + || ((arg->exp == EXP_BIAS) + && (significand(arg) <= 0xc90fdaa22168c234LL)) ) + { + poly_cos(arg, &rv); + reg_move(&rv, arg); + + /* We do not really know if up or down */ + set_precision_flag_down(); + + return 0; + } + else if ( (q = trig_arg(arg, FCOS)) != -1 ) + { + poly_sine(arg, &rv); + + if ((q+1) & 2) + rv.sign ^= SIGN_POS ^ SIGN_NEG; + reg_move(&rv, arg); + + /* We do not really know if up or down */ + set_precision_flag_down(); + + return 0; + } + else + { + /* Operand is out of range */ + arg->sign = arg_sign; /* restore st(0) */ + return 1; + } + } + else + { +#ifdef DENORM_OPERAND + if ( (arg->exp <= EXP_UNDER) && (denormal_operand()) ) + return 1; +#endif DENORM_OPERAND + + setcc(0); + reg_move(&CONST_1, arg); +#ifdef PECULIAR_486 + set_precision_flag_down(); /* 80486 appears to do this. */ +#else + set_precision_flag_up(); /* Must be up. */ +#endif PECULIAR_486 + return 0; + } + } + else if ( arg->tag == TW_Zero ) + { + reg_move(&CONST_1, arg); + setcc(0); + return 0; + } + else if ( arg->tag == TW_Infinity ) + { + /* The 80486 treats infinity as an invalid operand */ + arith_invalid(arg); + return 1; + } + else + { + single_arg_error(arg); /* requires arg == &st(0) */ + return 1; + } +} + + +static void fcos(FPU_REG *st0_ptr) +{ + f_cos(st0_ptr); +} + + +static void fsincos(FPU_REG *st0_ptr) +{ + char st0_tag = st0_ptr->tag; + FPU_REG *st_new_ptr; + FPU_REG arg; + + /* Stack underflow has higher priority */ + if ( st0_tag == TW_Empty ) + { + stack_underflow(); /* Puts a QNaN in st(0) */ + if ( control_word & CW_Invalid ) + { + st_new_ptr = &st(-1); + push(); + stack_underflow(); /* Puts a QNaN in the new st(0) */ + } + return; + } + + if ( STACK_OVERFLOW ) + { stack_overflow(); return; } + + if ( st0_tag == TW_NaN ) + { + single_arg_2_error(st0_ptr); + return; + } + else if ( st0_tag == TW_Infinity ) + { + /* The 80486 treats infinity as an invalid operand */ + if ( !arith_invalid(st0_ptr) ) + { + /* unmasked response */ + push(); + arith_invalid(st_new_ptr); + } + return; + } + + reg_move(st0_ptr,&arg); + if ( !f_cos(&arg) ) + { + fsin(st0_ptr); + push(); + reg_move(&arg,st_new_ptr); + } + +} + + +/*---------------------------------------------------------------------------*/ +/* The following all require two arguments: st(0) and st(1) */ + +/* A lean, mean kernel for the fprem instructions. This relies upon + the division and rounding to an integer in do_fprem giving an + exact result. Because of this, rem_kernel() needs to deal only with + the least significant 64 bits, the more significant bits of the + result must be zero. + */ +static void rem_kernel(unsigned long long st0, unsigned long long *y, + unsigned long long st1, + unsigned long long q, int n) +{ + unsigned long long x; + + x = st0 << n; + + /* Do the required multiplication and subtraction in the one operation */ + asm volatile ("movl %2,%%eax; mull %4; subl %%eax,%0; sbbl %%edx,%1; + movl %3,%%eax; mull %4; subl %%eax,%1; + movl %2,%%eax; mull %5; subl %%eax,%1;" + :"=m" (x), "=m" (((unsigned *)&x)[1]) + :"m" (st1),"m" (((unsigned *)&st1)[1]), + "m" (q),"m" (((unsigned *)&q)[1]) + :"%ax","%dx"); + + *y = x; +} + + +/* Remainder of st(0) / st(1) */ +/* This routine produces exact results, i.e. there is never any + rounding or truncation, etc of the result. */ +static void do_fprem(FPU_REG *st0_ptr, int round) +{ + FPU_REG *st1_ptr = &st(1); + char st1_tag = st1_ptr->tag; + char st0_tag = st0_ptr->tag; + char sign = st0_ptr->sign; + + if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) ) + { + FPU_REG tmp; + int old_cw = control_word; + int expdif = st0_ptr->exp - st1_ptr->exp; + long long q; + unsigned short saved_status; + int cc = 0; + +#ifdef DENORM_OPERAND + if ( ((st0_ptr->exp <= EXP_UNDER) || + (st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + /* We want the status following the denorm tests, but don't want + the status changed by the arithmetic operations. */ + saved_status = partial_status; + control_word &= ~CW_RC; + control_word |= RC_CHOP; + + if (expdif < 64) + { + /* This should be the most common case */ + + if ( expdif > -2 ) + { + reg_div(st0_ptr, st1_ptr, &tmp, PR_64_BITS | RC_CHOP | 0x3f); + + if ( tmp.exp >= EXP_BIAS ) + { + round_to_int(&tmp); /* Fortunately, this can't overflow + to 2^64 */ + q = significand(&tmp); + + rem_kernel(significand(st0_ptr), + &significand(&tmp), + significand(st1_ptr), + q, expdif); + + tmp.exp = st1_ptr->exp; + } + else + { + reg_move(st0_ptr, &tmp); + q = 0; + } + tmp.sign = sign; + + if ( (round == RC_RND) && (tmp.sigh & 0xc0000000) ) + { + /* We may need to subtract st(1) once more, + to get a result <= 1/2 of st(1). */ + unsigned long long x; + expdif = st1_ptr->exp - tmp.exp; + if ( expdif <= 1 ) + { + if ( expdif == 0 ) + x = significand(st1_ptr) - significand(&tmp); + else /* expdif is 1 */ + x = (significand(st1_ptr) << 1) - significand(&tmp); + if ( (x < significand(&tmp)) || + /* or equi-distant (from 0 & st(1)) and q is odd */ + ((x == significand(&tmp)) && (q & 1) ) ) + { + tmp.sign ^= (SIGN_POS^SIGN_NEG); + significand(&tmp) = x; + q++; + } + } + } + + if (q & 4) cc |= SW_C0; + if (q & 2) cc |= SW_C3; + if (q & 1) cc |= SW_C1; + } + else + { + control_word = old_cw; + setcc(0); + return; + } + } + else + { + /* There is a large exponent difference ( >= 64 ) */ + /* To make much sense, the code in this section should + be done at high precision. */ + int exp_1; + + /* prevent overflow here */ + /* N is 'a number between 32 and 63' (p26-113) */ + reg_move(st0_ptr, &tmp); + tmp.exp = EXP_BIAS + 56; + exp_1 = st1_ptr->exp; st1_ptr->exp = EXP_BIAS; + expdif -= 56; + + reg_div(&tmp, st1_ptr, &tmp, PR_64_BITS | RC_CHOP | 0x3f); + st1_ptr->exp = exp_1; + + round_to_int(&tmp); /* Fortunately, this can't overflow to 2^64 */ + + rem_kernel(significand(st0_ptr), + &significand(&tmp), + significand(st1_ptr), + significand(&tmp), + tmp.exp - EXP_BIAS + ); + tmp.exp = exp_1 + expdif; + tmp.sign = sign; + + /* It is possible for the operation to be complete here. + What does the IEEE standard say? The Intel 80486 manual + implies that the operation will never be completed at this + point, and the behaviour of a real 80486 confirms this. + */ + if ( !(tmp.sigh | tmp.sigl) ) + { + /* The result is zero */ + control_word = old_cw; + partial_status = saved_status; + reg_move(&CONST_Z, st0_ptr); + st0_ptr->sign = sign; +#ifdef PECULIAR_486 + setcc(SW_C2); +#else + setcc(0); +#endif PECULIAR_486 + return; + } + cc = SW_C2; + } + + control_word = old_cw; + partial_status = saved_status; + normalize_nuo(&tmp); + reg_move(&tmp, st0_ptr); + setcc(cc); + + /* The only condition to be looked for is underflow, + and it can occur here only if underflow is unmasked. */ + if ( (st0_ptr->exp <= EXP_UNDER) && (st0_ptr->tag != TW_Zero) + && !(control_word & CW_Underflow) ) + arith_underflow(st0_ptr); + + return; + } + else if ( (st0_tag == TW_Empty) | (st1_tag == TW_Empty) ) + { + stack_underflow(); + return; + } + else if ( st0_tag == TW_Zero ) + { + if ( st1_tag == TW_Valid ) + { +#ifdef DENORM_OPERAND + if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + setcc(0); return; + } + else if ( st1_tag == TW_Zero ) + { arith_invalid(st0_ptr); return; } /* fprem(?,0) always invalid */ + else if ( st1_tag == TW_Infinity ) + { setcc(0); return; } + } + else if ( st0_tag == TW_Valid ) + { + if ( st1_tag == TW_Zero ) + { + arith_invalid(st0_ptr); /* fprem(Valid,Zero) is invalid */ + return; + } + else if ( st1_tag != TW_NaN ) + { +#ifdef DENORM_OPERAND + if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + if ( st1_tag == TW_Infinity ) + { + /* fprem(Valid,Infinity) is o.k. */ + setcc(0); return; + } + } + } + else if ( st0_tag == TW_Infinity ) + { + if ( st1_tag != TW_NaN ) + { + arith_invalid(st0_ptr); /* fprem(Infinity,?) is invalid */ + return; + } + } + + /* One of the registers must contain a NaN is we got here. */ + +#ifdef PARANOID + if ( (st0_tag != TW_NaN) && (st1_tag != TW_NaN) ) + EXCEPTION(EX_INTERNAL | 0x118); +#endif PARANOID + + real_2op_NaN(st1_ptr, st0_ptr, st0_ptr); + +} + + +/* ST(1) <- ST(1) * log ST; pop ST */ +static void fyl2x(FPU_REG *st0_ptr) +{ + char st0_tag = st0_ptr->tag; + FPU_REG *st1_ptr = &st(1), exponent; + char st1_tag = st1_ptr->tag; + int e; + + clear_C1(); + if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) ) + { + if ( st0_ptr->sign == SIGN_POS ) + { +#ifdef DENORM_OPERAND + if ( ((st0_ptr->exp <= EXP_UNDER) || + (st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + if ( (st0_ptr->sigh == 0x80000000) && (st0_ptr->sigl == 0) ) + { + /* Special case. The result can be precise. */ + e = st0_ptr->exp - EXP_BIAS; + if ( e > 0 ) + { + exponent.sigh = e; + exponent.sign = SIGN_POS; + } + else + { + exponent.sigh = -e; + exponent.sign = SIGN_NEG; + } + exponent.sigl = 0; + exponent.exp = EXP_BIAS + 31; + exponent.tag = TW_Valid; + normalize_nuo(&exponent); + reg_mul(&exponent, st1_ptr, st1_ptr, FULL_PRECISION); + } + else + { + /* The usual case */ + poly_l2(st0_ptr, st1_ptr, st1_ptr); + if ( st1_ptr->exp <= EXP_UNDER ) + { + /* A denormal result has been produced. + Precision must have been lost, this is always + an underflow. */ + arith_underflow(st1_ptr); + } + else + set_precision_flag_up(); /* 80486 appears to always do this */ + } + pop(); + return; + } + else + { + /* negative */ + if ( !arith_invalid(st1_ptr) ) + pop(); + return; + } + } + else if ( (st0_tag == TW_Empty) || (st1_tag == TW_Empty) ) + { + stack_underflow_pop(1); + return; + } + else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) ) + { + if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) ) + pop(); + return; + } + else if ( (st0_tag <= TW_Zero) && (st1_tag <= TW_Zero) ) + { + /* one of the args is zero, the other valid, or both zero */ + if ( st0_tag == TW_Zero ) + { + if ( st1_tag == TW_Zero ) + { + /* Both args zero is invalid */ + if ( !arith_invalid(st1_ptr) ) + pop(); + } +#ifdef PECULIAR_486 + /* This case is not specifically covered in the manual, + but divide-by-zero would seem to be the best response. + However, a real 80486 does it this way... */ + else if ( st0_ptr->tag == TW_Infinity ) + { + reg_move(&CONST_INF, st1_ptr); + pop(); + } +#endif PECULIAR_486 + else + { + if ( !divide_by_zero(st1_ptr->sign^SIGN_NEG^SIGN_POS, st1_ptr) ) + pop(); + } + return; + } + else + { + /* st(1) contains zero, st(0) valid <> 0 */ + /* Zero is the valid answer */ + char sign = st1_ptr->sign; + + if ( st0_ptr->sign == SIGN_NEG ) + { + /* log(negative) */ + if ( !arith_invalid(st1_ptr) ) + pop(); + return; + } + +#ifdef DENORM_OPERAND + if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + if ( st0_ptr->exp < EXP_BIAS ) sign ^= SIGN_NEG^SIGN_POS; + pop(); st0_ptr = &st(0); + reg_move(&CONST_Z, st0_ptr); + st0_ptr->sign = sign; + return; + } + } + /* One or both arg must be an infinity */ + else if ( st0_tag == TW_Infinity ) + { + if ( (st0_ptr->sign == SIGN_NEG) || (st1_tag == TW_Zero) ) + { + /* log(-infinity) or 0*log(infinity) */ + if ( !arith_invalid(st1_ptr) ) + pop(); + return; + } + else + { + char sign = st1_ptr->sign; + +#ifdef DENORM_OPERAND + if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + pop(); st0_ptr = &st(0); + reg_move(&CONST_INF, st0_ptr); + st0_ptr->sign = sign; + return; + } + } + /* st(1) must be infinity here */ + else if ( (st0_tag == TW_Valid) && (st0_ptr->sign == SIGN_POS) ) + { + if ( st0_ptr->exp >= EXP_BIAS ) + { + if ( (st0_ptr->exp == EXP_BIAS) && + (st0_ptr->sigh == 0x80000000) && + (st0_ptr->sigl == 0) ) + { + /* st(0) holds 1.0 */ + /* infinity*log(1) */ + if ( !arith_invalid(st1_ptr) ) + pop(); + return; + } + /* st(0) is positive and > 1.0 */ + pop(); + } + else + { + /* st(0) is positive and < 1.0 */ + +#ifdef DENORM_OPERAND + if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + st1_ptr->sign ^= SIGN_NEG; + pop(); + } + return; + } + else + { + /* st(0) must be zero or negative */ + if ( st0_ptr->tag == TW_Zero ) + { + /* This should be invalid, but a real 80486 is happy with it. */ +#ifndef PECULIAR_486 + if ( !divide_by_zero(st1_ptr->sign, st1_ptr) ) +#endif PECULIAR_486 + { + st1_ptr->sign ^= SIGN_NEG^SIGN_POS; + pop(); + } + } + else + { + /* log(negative) */ + if ( !arith_invalid(st1_ptr) ) + pop(); + } + return; + } +} + + +static void fpatan(FPU_REG *st0_ptr) +{ + char st0_tag = st0_ptr->tag; + FPU_REG *st1_ptr = &st(1); + char st1_tag = st1_ptr->tag; + + clear_C1(); + if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) ) + { +#ifdef DENORM_OPERAND + if ( ((st0_ptr->exp <= EXP_UNDER) || + (st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + poly_atan(st0_ptr, st1_ptr, st1_ptr); + + if ( st1_ptr->exp <= EXP_UNDER ) + { + /* A denormal result has been produced. + Precision must have been lost. + This is by definition an underflow. */ + arith_underflow(st1_ptr); + pop(); + return; + } + } + else if ( (st0_tag == TW_Empty) || (st1_tag == TW_Empty) ) + { + stack_underflow_pop(1); + return; + } + else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) ) + { + if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) ) + pop(); + return; + } + else if ( (st0_tag == TW_Infinity) || (st1_tag == TW_Infinity) ) + { + char sign = st1_ptr->sign; + if ( st0_tag == TW_Infinity ) + { + if ( st1_tag == TW_Infinity ) + { + if ( st0_ptr->sign == SIGN_POS ) + { reg_move(&CONST_PI4, st1_ptr); } + else + reg_add(&CONST_PI4, &CONST_PI2, st1_ptr, FULL_PRECISION); + } + else + { +#ifdef DENORM_OPERAND + if ( st1_tag != TW_Zero ) + { + if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; + } +#endif DENORM_OPERAND + + if ( st0_ptr->sign == SIGN_POS ) + { + reg_move(&CONST_Z, st1_ptr); + st1_ptr->sign = sign; /* An 80486 preserves the sign */ + pop(); + return; + } + else + reg_move(&CONST_PI, st1_ptr); + } + } + else + { + /* st(1) is infinity, st(0) not infinity */ +#ifdef DENORM_OPERAND + if ( st0_tag != TW_Zero ) + { + if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; + } +#endif DENORM_OPERAND + + reg_move(&CONST_PI2, st1_ptr); + } + st1_ptr->sign = sign; + } + else if ( st1_tag == TW_Zero ) + { + /* st(0) must be valid or zero */ + char sign = st1_ptr->sign; + +#ifdef DENORM_OPERAND + if ( st0_tag != TW_Zero ) + { + if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; + } +#endif DENORM_OPERAND + + if ( st0_ptr->sign == SIGN_POS ) + { /* An 80486 preserves the sign */ pop(); return; } + else + reg_move(&CONST_PI, st1_ptr); + st1_ptr->sign = sign; + } + else if ( st0_tag == TW_Zero ) + { + /* st(1) must be TW_Valid here */ + char sign = st1_ptr->sign; + +#ifdef DENORM_OPERAND + if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + reg_move(&CONST_PI2, st1_ptr); + st1_ptr->sign = sign; + } +#ifdef PARANOID + else + EXCEPTION(EX_INTERNAL | 0x125); +#endif PARANOID + + pop(); + set_precision_flag_up(); /* We do not really know if up or down */ +} + + +static void fprem(FPU_REG *st0_ptr) +{ + do_fprem(st0_ptr, RC_CHOP); +} + + +static void fprem1(FPU_REG *st0_ptr) +{ + do_fprem(st0_ptr, RC_RND); +} + + +static void fyl2xp1(FPU_REG *st0_ptr) +{ + char st0_tag = st0_ptr->tag, sign; + FPU_REG *st1_ptr = &st(1); + char st1_tag = st1_ptr->tag; + + clear_C1(); + if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) ) + { +#ifdef DENORM_OPERAND + if ( ((st0_ptr->exp <= EXP_UNDER) || + (st1_ptr->exp <= EXP_UNDER)) && denormal_operand() ) + return; +#endif DENORM_OPERAND + + if ( poly_l2p1(st0_ptr, st1_ptr, st1_ptr) ) + { +#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ + st1_ptr->sign ^= SIGN_POS^SIGN_NEG; +#else + if ( arith_invalid(st1_ptr) ) /* poly_l2p1() returned invalid */ + return; +#endif PECULIAR_486 + } + if ( st1_ptr->exp <= EXP_UNDER ) + { + /* A denormal result has been produced. + Precision must have been lost, this is always + an underflow. */ + sign = st1_ptr->sign; + arith_underflow(st1_ptr); + st1_ptr->sign = sign; + } + else + set_precision_flag_up(); /* 80486 appears to always do this */ + pop(); + return; + } + else if ( (st0_tag == TW_Empty) | (st1_tag == TW_Empty) ) + { + stack_underflow_pop(1); + return; + } + else if ( st0_tag == TW_Zero ) + { + if ( st1_tag <= TW_Zero ) + { +#ifdef DENORM_OPERAND + if ( (st1_tag == TW_Valid) && (st1_ptr->exp <= EXP_UNDER) && + (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + st0_ptr->sign ^= st1_ptr->sign; + reg_move(st0_ptr, st1_ptr); + } + else if ( st1_tag == TW_Infinity ) + { + /* Infinity*log(1) */ + if ( !arith_invalid(st1_ptr) ) + pop(); + return; + } + else if ( st1_tag == TW_NaN ) + { + if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) ) + pop(); + return; + } +#ifdef PARANOID + else + { + EXCEPTION(EX_INTERNAL | 0x116); + return; + } +#endif PARANOID + pop(); return; + } + else if ( st0_tag == TW_Valid ) + { + if ( st1_tag == TW_Zero ) + { + if ( st0_ptr->sign == SIGN_NEG ) + { + if ( st0_ptr->exp >= EXP_BIAS ) + { + /* st(0) holds <= -1.0 */ +#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ + st1_ptr->sign ^= SIGN_POS^SIGN_NEG; +#else + if ( arith_invalid(st1_ptr) ) return; +#endif PECULIAR_486 + pop(); return; + } +#ifdef DENORM_OPERAND + if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + st1_ptr->sign ^= SIGN_POS^SIGN_NEG; + pop(); return; + } +#ifdef DENORM_OPERAND + if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + pop(); return; + } + if ( st1_tag == TW_Infinity ) + { + if ( st0_ptr->sign == SIGN_NEG ) + { + if ( (st0_ptr->exp >= EXP_BIAS) && + !((st0_ptr->sigh == 0x80000000) && + (st0_ptr->sigl == 0)) ) + { + /* st(0) holds < -1.0 */ +#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ + st1_ptr->sign ^= SIGN_POS^SIGN_NEG; +#else + if ( arith_invalid(st1_ptr) ) return; +#endif PECULIAR_486 + pop(); return; + } +#ifdef DENORM_OPERAND + if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + st1_ptr->sign ^= SIGN_POS^SIGN_NEG; + pop(); return; + } +#ifdef DENORM_OPERAND + if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + pop(); return; + } + if ( st1_tag == TW_NaN ) + { + if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) ) + pop(); + return; + } + } + else if ( st0_tag == TW_NaN ) + { + if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) ) + pop(); + return; + } + else if ( st0_tag == TW_Infinity ) + { + if ( st1_tag == TW_NaN ) + { + if ( !real_2op_NaN(st0_ptr, st1_ptr, st1_ptr) ) + pop(); + return; + } + else if ( st0_ptr->sign == SIGN_NEG ) + { + int exponent = st1_ptr->exp; +#ifndef PECULIAR_486 + /* This should have higher priority than denormals, but... */ + if ( arith_invalid(st1_ptr) ) /* log(-infinity) */ + return; +#endif PECULIAR_486 +#ifdef DENORM_OPERAND + if ( st1_tag != TW_Zero ) + { + if ( (exponent <= EXP_UNDER) && (denormal_operand()) ) + return; + } +#endif DENORM_OPERAND +#ifdef PECULIAR_486 + /* Denormal operands actually get higher priority */ + if ( arith_invalid(st1_ptr) ) /* log(-infinity) */ + return; +#endif PECULIAR_486 + pop(); + return; + } + else if ( st1_tag == TW_Zero ) + { + /* log(infinity) */ + if ( !arith_invalid(st1_ptr) ) + pop(); + return; + } + + /* st(1) must be valid here. */ + +#ifdef DENORM_OPERAND + if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + /* The Manual says that log(Infinity) is invalid, but a real + 80486 sensibly says that it is o.k. */ + { char sign = st1_ptr->sign; + reg_move(&CONST_INF, st1_ptr); + st1_ptr->sign = sign; + } + pop(); + return; + } +#ifdef PARANOID + else + { + EXCEPTION(EX_INTERNAL | 0x117); + } +#endif PARANOID +} + + +static void fscale(FPU_REG *st0_ptr) +{ + char st0_tag = st0_ptr->tag; + FPU_REG *st1_ptr = &st(1); + char st1_tag = st1_ptr->tag; + int old_cw = control_word; + char sign = st0_ptr->sign; + + clear_C1(); + if ( !((st0_tag ^ TW_Valid) | (st1_tag ^ TW_Valid)) ) + { + long scale; + FPU_REG tmp; + +#ifdef DENORM_OPERAND + if ( ((st0_ptr->exp <= EXP_UNDER) || + (st1_ptr->exp <= EXP_UNDER)) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + if ( st1_ptr->exp > EXP_BIAS + 30 ) + { + /* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */ + char sign; + + if ( st1_ptr->sign == SIGN_POS ) + { + EXCEPTION(EX_Overflow); + sign = st0_ptr->sign; + reg_move(&CONST_INF, st0_ptr); + st0_ptr->sign = sign; + } + else + { + EXCEPTION(EX_Underflow); + sign = st0_ptr->sign; + reg_move(&CONST_Z, st0_ptr); + st0_ptr->sign = sign; + } + return; + } + + control_word &= ~CW_RC; + control_word |= RC_CHOP; + reg_move(st1_ptr, &tmp); + round_to_int(&tmp); /* This can never overflow here */ + control_word = old_cw; + scale = st1_ptr->sign ? -tmp.sigl : tmp.sigl; + scale += st0_ptr->exp; + st0_ptr->exp = scale; + + /* Use round_reg() to properly detect under/overflow etc */ + round_reg(st0_ptr, 0, control_word); + + return; + } + else if ( st0_tag == TW_Valid ) + { + if ( st1_tag == TW_Zero ) + { + +#ifdef DENORM_OPERAND + if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + return; + } + if ( st1_tag == TW_Infinity ) + { +#ifdef DENORM_OPERAND + if ( (st0_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + if ( st1_ptr->sign == SIGN_POS ) + { reg_move(&CONST_INF, st0_ptr); } + else + reg_move(&CONST_Z, st0_ptr); + st0_ptr->sign = sign; + return; + } + if ( st1_tag == TW_NaN ) + { real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; } + } + else if ( st0_tag == TW_Zero ) + { + if ( st1_tag == TW_Valid ) + { + +#ifdef DENORM_OPERAND + if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + return; + } + else if ( st1_tag == TW_Zero ) { return; } + else if ( st1_tag == TW_Infinity ) + { + if ( st1_ptr->sign == SIGN_NEG ) + return; + else + { + arith_invalid(st0_ptr); /* Zero scaled by +Infinity */ + return; + } + } + else if ( st1_tag == TW_NaN ) + { real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; } + } + else if ( st0_tag == TW_Infinity ) + { + if ( st1_tag == TW_Valid ) + { + +#ifdef DENORM_OPERAND + if ( (st1_ptr->exp <= EXP_UNDER) && (denormal_operand()) ) + return; +#endif DENORM_OPERAND + + return; + } + if ( ((st1_tag == TW_Infinity) && (st1_ptr->sign == SIGN_POS)) + || (st1_tag == TW_Zero) ) + return; + else if ( st1_tag == TW_Infinity ) + { + arith_invalid(st0_ptr); /* Infinity scaled by -Infinity */ + return; + } + else if ( st1_tag == TW_NaN ) + { real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; } + } + else if ( st0_tag == TW_NaN ) + { + if ( st1_tag != TW_Empty ) + { real_2op_NaN(st0_ptr, st1_ptr, st0_ptr); return; } + } + +#ifdef PARANOID + if ( !((st0_tag == TW_Empty) || (st1_tag == TW_Empty)) ) + { + EXCEPTION(EX_INTERNAL | 0x115); + return; + } +#endif + + /* At least one of st(0), st(1) must be empty */ + stack_underflow(); + +} + + +/*---------------------------------------------------------------------------*/ + +static FUNC_ST0 const trig_table_a[] = { + f2xm1, fyl2x, fptan, fpatan, fxtract, fprem1, fdecstp, fincstp +}; + +void trig_a(void) +{ + (trig_table_a[FPU_rm])(&st(0)); +} + + +static FUNC_ST0 const trig_table_b[] = + { + fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, fsin, fcos + }; + +void trig_b(void) +{ + (trig_table_b[FPU_rm])(&st(0)); +} diff --git a/arch/i386/math-emu/get_address.c b/arch/i386/math-emu/get_address.c new file mode 100644 index 000000000..6f3270ae3 --- /dev/null +++ b/arch/i386/math-emu/get_address.c @@ -0,0 +1,423 @@ +/*---------------------------------------------------------------------------+ + | get_address.c | + | | + | Get the effective address from an FPU instruction. | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + + +#include <linux/stddef.h> +#include <linux/head.h> + +#include <asm/segment.h> + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" + + +#define FPU_WRITE_BIT 0x10 + +static int reg_offset[] = { + offsetof(struct info,___eax), + offsetof(struct info,___ecx), + offsetof(struct info,___edx), + offsetof(struct info,___ebx), + offsetof(struct info,___esp), + offsetof(struct info,___ebp), + offsetof(struct info,___esi), + offsetof(struct info,___edi) +}; + +#define REG_(x) (*(long *)(reg_offset[(x)]+(char *) FPU_info)) + +static int reg_offset_vm86[] = { + offsetof(struct info,___cs), + offsetof(struct info,___vm86_ds), + offsetof(struct info,___vm86_es), + offsetof(struct info,___vm86_fs), + offsetof(struct info,___vm86_gs), + offsetof(struct info,___ss), + offsetof(struct info,___vm86_ds) + }; + +#define VM86_REG_(x) (*(unsigned short *) \ + (reg_offset_vm86[((unsigned)x)]+(char *) FPU_info)) + +static int reg_offset_pm[] = { + offsetof(struct info,___cs), + offsetof(struct info,___ds), + offsetof(struct info,___es), + offsetof(struct info,___fs), + offsetof(struct info,___gs), + offsetof(struct info,___ss), + offsetof(struct info,___ds) + }; + +#define PM_REG_(x) (*(unsigned short *) \ + (reg_offset_pm[((unsigned)x)]+(char *) FPU_info)) + + +/* Decode the SIB byte. This function assumes mod != 0 */ +static int sib(int mod, unsigned long *fpu_eip) +{ + unsigned char ss,index,base; + long offset; + + RE_ENTRANT_CHECK_OFF; + FPU_code_verify_area(1); + base = get_fs_byte((char *) (*fpu_eip)); /* The SIB byte */ + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + ss = base >> 6; + index = (base >> 3) & 7; + base &= 7; + + if ((mod == 0) && (base == 5)) + offset = 0; /* No base register */ + else + offset = REG_(base); + + if (index == 4) + { + /* No index register */ + /* A non-zero ss is illegal */ + if ( ss ) + EXCEPTION(EX_Invalid); + } + else + { + offset += (REG_(index)) << ss; + } + + if (mod == 1) + { + /* 8 bit signed displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_verify_area(1); + offset += (signed char) get_fs_byte((char *) (*fpu_eip)); + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + } + else if (mod == 2 || base == 5) /* The second condition also has mod==0 */ + { + /* 32 bit displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_verify_area(4); + offset += (signed) get_fs_long((unsigned long *) (*fpu_eip)); + RE_ENTRANT_CHECK_ON; + (*fpu_eip) += 4; + } + + return offset; +} + + +static unsigned long vm86_segment(unsigned char segment, + unsigned short *selector) +{ + segment--; +#ifdef PARANOID + if ( segment > PREFIX_SS_ ) + { + EXCEPTION(EX_INTERNAL|0x130); + math_abort(FPU_info,SIGSEGV); + } +#endif PARANOID + *selector = VM86_REG_(segment); + return (unsigned long)VM86_REG_(segment) << 4; +} + + +/* This should work for 16 and 32 bit protected mode. */ +static long pm_address(unsigned char FPU_modrm, unsigned char segment, + unsigned short *selector, long offset) +{ + struct desc_struct descriptor; + unsigned long base_address, limit, address, seg_top; + + segment--; +#ifdef PARANOID + if ( segment > PREFIX_SS_ ) + { + EXCEPTION(EX_INTERNAL|0x132); + math_abort(FPU_info,SIGSEGV); + } +#endif PARANOID + + *selector = PM_REG_(segment); + + descriptor = LDT_DESCRIPTOR(PM_REG_(segment)); + base_address = SEG_BASE_ADDR(descriptor); + address = base_address + offset; + limit = base_address + + (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1; + if ( limit < base_address ) limit = 0xffffffff; + + if ( SEG_EXPAND_DOWN(descriptor) ) + { + if ( SEG_G_BIT(descriptor) ) + seg_top = 0xffffffff; + else + { + seg_top = base_address + (1 << 20); + if ( seg_top < base_address ) seg_top = 0xffffffff; + } + access_limit = + (address <= limit) || (address >= seg_top) ? 0 : + ((seg_top-address) >= 255 ? 255 : seg_top-address); + } + else + { + access_limit = + (address > limit) || (address < base_address) ? 0 : + ((limit-address) >= 254 ? 255 : limit-address+1); + } + if ( SEG_EXECUTE_ONLY(descriptor) || + (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) ) + { + access_limit = 0; + } + return address; +} + + +/* + MOD R/M byte: MOD == 3 has a special use for the FPU + SIB byte used iff R/M = 100b + + 7 6 5 4 3 2 1 0 + ..... ......... ......... + MOD OPCODE(2) R/M + + + SIB byte + + 7 6 5 4 3 2 1 0 + ..... ......... ......... + SS INDEX BASE + +*/ + +void *get_address(unsigned char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, +/* unsigned short *selector, unsigned long *offset, */ + fpu_addr_modes addr_modes) +{ + unsigned char mod; + unsigned rm = FPU_modrm & 7; + long *cpu_reg_ptr; + int address = 0; /* Initialized just to stop compiler warnings. */ + + /* Memory accessed via the cs selector is write protected + in `non-segmented' 32 bit protected mode. */ + if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) + && (addr_modes.override.segment == PREFIX_CS_) ) + { + math_abort(FPU_info,SIGSEGV); + } + + addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ + + mod = (FPU_modrm >> 6) & 3; + + if (rm == 4 && mod != 3) + { + address = sib(mod, fpu_eip); + } + else + { + cpu_reg_ptr = & REG_(rm); + switch (mod) + { + case 0: + if (rm == 5) + { + /* Special case: disp32 */ + RE_ENTRANT_CHECK_OFF; + FPU_code_verify_area(4); + address = get_fs_long((unsigned long *) (*fpu_eip)); + (*fpu_eip) += 4; + RE_ENTRANT_CHECK_ON; + addr->offset = address; + return (void *) address; + } + else + { + address = *cpu_reg_ptr; /* Just return the contents + of the cpu register */ + addr->offset = address; + return (void *) address; + } + case 1: + /* 8 bit signed displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_verify_area(1); + address = (signed char) get_fs_byte((char *) (*fpu_eip)); + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + break; + case 2: + /* 32 bit displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_verify_area(4); + address = (signed) get_fs_long((unsigned long *) (*fpu_eip)); + (*fpu_eip) += 4; + RE_ENTRANT_CHECK_ON; + break; + case 3: + /* Not legal for the FPU */ + EXCEPTION(EX_Invalid); + } + address += *cpu_reg_ptr; + } + + addr->offset = address; + + switch ( addr_modes.default_mode ) + { + case 0: + break; + case VM86: + address += vm86_segment(addr_modes.override.segment, + (unsigned short *)&(addr->selector)); + break; + case PM16: + case SEG32: + address = pm_address(FPU_modrm, addr_modes.override.segment, + (unsigned short *)&(addr->selector), address); + break; + default: + EXCEPTION(EX_INTERNAL|0x133); + } + + return (void *)address; +} + + +void *get_address_16(unsigned char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, +/* unsigned short *selector, unsigned long *offset, */ + fpu_addr_modes addr_modes) +{ + unsigned char mod; + unsigned rm = FPU_modrm & 7; + int address = 0; /* Default used for mod == 0 */ + + /* Memory accessed via the cs selector is write protected + in `non-segmented' 32 bit protected mode. */ + if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) + && (addr_modes.override.segment == PREFIX_CS_) ) + { + math_abort(FPU_info,SIGSEGV); + } + + addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ + + mod = (FPU_modrm >> 6) & 3; + + switch (mod) + { + case 0: + if (rm == 6) + { + /* Special case: disp16 */ + RE_ENTRANT_CHECK_OFF; + FPU_code_verify_area(2); + address = (unsigned short)get_fs_word((unsigned short *) (*fpu_eip)); + (*fpu_eip) += 2; + RE_ENTRANT_CHECK_ON; + goto add_segment; + } + break; + case 1: + /* 8 bit signed displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_verify_area(1); + address = (signed char) get_fs_byte((signed char *) (*fpu_eip)); + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + break; + case 2: + /* 16 bit displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_verify_area(2); + address = (unsigned) get_fs_word((unsigned short *) (*fpu_eip)); + (*fpu_eip) += 2; + RE_ENTRANT_CHECK_ON; + break; + case 3: + /* Not legal for the FPU */ + EXCEPTION(EX_Invalid); + break; + } + switch ( rm ) + { + case 0: + address += FPU_info->___ebx + FPU_info->___esi; + break; + case 1: + address += FPU_info->___ebx + FPU_info->___edi; + break; + case 2: + address += FPU_info->___ebp + FPU_info->___esi; + if ( addr_modes.override.segment == PREFIX_DEFAULT ) + addr_modes.override.segment = PREFIX_SS_; + break; + case 3: + address += FPU_info->___ebp + FPU_info->___edi; + if ( addr_modes.override.segment == PREFIX_DEFAULT ) + addr_modes.override.segment = PREFIX_SS_; + break; + case 4: + address += FPU_info->___esi; + break; + case 5: + address += FPU_info->___edi; + break; + case 6: + address += FPU_info->___ebp; + if ( addr_modes.override.segment == PREFIX_DEFAULT ) + addr_modes.override.segment = PREFIX_SS_; + break; + case 7: + address += FPU_info->___ebx; + break; + } + + add_segment: + address &= 0xffff; + + addr->offset = address; + + switch ( addr_modes.default_mode ) + { + case 0: + break; + case VM86: + address += vm86_segment(addr_modes.override.segment, + (unsigned short *)&(addr->selector)); + break; + case PM16: + case SEG32: + address = pm_address(FPU_modrm, addr_modes.override.segment, + (unsigned short *)&(addr->selector), address); + break; + default: + EXCEPTION(EX_INTERNAL|0x131); + } + + return (void *)address ; +} diff --git a/arch/i386/math-emu/load_store.c b/arch/i386/math-emu/load_store.c new file mode 100644 index 000000000..6f0e167d6 --- /dev/null +++ b/arch/i386/math-emu/load_store.c @@ -0,0 +1,260 @@ +/*---------------------------------------------------------------------------+ + | load_store.c | + | | + | This file contains most of the code to interpret the FPU instructions | + | which load and store from user memory. | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +#include <asm/segment.h> + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "control_w.h" + + +#define _NONE_ 0 /* st0_ptr etc not needed */ +#define _REG0_ 1 /* Will be storing st(0) */ +#define _PUSH_ 3 /* Need to check for space to push onto stack */ +#define _null_ 4 /* Function illegal or not implemented */ + +#define pop_0() { st0_ptr->tag = TW_Empty; top++; } + + +static unsigned char const type_table[32] = { + _PUSH_, _PUSH_, _PUSH_, _PUSH_, + _null_, _null_, _null_, _null_, + _REG0_, _REG0_, _REG0_, _REG0_, + _REG0_, _REG0_, _REG0_, _REG0_, + _NONE_, _null_, _NONE_, _PUSH_, + _NONE_, _PUSH_, _null_, _PUSH_, + _NONE_, _null_, _NONE_, _REG0_, + _NONE_, _REG0_, _NONE_, _REG0_ + }; + +unsigned char const data_sizes_16[32] = { + 4, 4, 8, 2, 0, 0, 0, 0, + 4, 4, 8, 2, 4, 4, 8, 2, + 14, 0, 94, 10, 2, 10, 0, 8, + 14, 0, 94, 10, 2, 10, 2, 8 +}; + +unsigned char const data_sizes_32[32] = { + 4, 4, 8, 2, 0, 0, 0, 0, + 4, 4, 8, 2, 4, 4, 8, 2, + 28, 0,108, 10, 2, 10, 0, 8, + 28, 0,108, 10, 2, 10, 2, 8 +}; + +int load_store_instr(unsigned char type, fpu_addr_modes addr_modes, + void *data_address) +{ + FPU_REG loaded_data; + FPU_REG *st0_ptr; + + st0_ptr = NULL; /* Initialized just to stop compiler warnings. */ + + if ( addr_modes.default_mode & PROTECTED ) + { + if ( addr_modes.default_mode == SEG32 ) + { + if ( access_limit < data_sizes_32[type] ) + math_abort(FPU_info,SIGSEGV); + } + else if ( addr_modes.default_mode == PM16 ) + { + if ( access_limit < data_sizes_16[type] ) + math_abort(FPU_info,SIGSEGV); + } +#ifdef PARANOID + else + EXCEPTION(EX_INTERNAL|0x140); +#endif PARANOID + } + + switch ( type_table[type] ) + { + case _NONE_: + break; + case _REG0_: + st0_ptr = &st(0); /* Some of these instructions pop after + storing */ + break; + case _PUSH_: + { + st0_ptr = &st(-1); + if ( st0_ptr->tag != TW_Empty ) + { stack_overflow(); return 0; } + top--; + } + break; + case _null_: + FPU_illegal(); + return 0; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x141); + return 0; +#endif PARANOID + } + + switch ( type ) + { + case 000: /* fld m32real */ + clear_C1(); + reg_load_single((float *)data_address, &loaded_data); + if ( (loaded_data.tag == TW_NaN) && + real_2op_NaN(&loaded_data, &loaded_data, &loaded_data) ) + { + top++; + break; + } + reg_move(&loaded_data, st0_ptr); + break; + case 001: /* fild m32int */ + clear_C1(); + reg_load_int32((long *)data_address, st0_ptr); + break; + case 002: /* fld m64real */ + clear_C1(); + reg_load_double((double *)data_address, &loaded_data); + if ( (loaded_data.tag == TW_NaN) && + real_2op_NaN(&loaded_data, &loaded_data, &loaded_data) ) + { + top++; + break; + } + reg_move(&loaded_data, st0_ptr); + break; + case 003: /* fild m16int */ + clear_C1(); + reg_load_int16((short *)data_address, st0_ptr); + break; + case 010: /* fst m32real */ + clear_C1(); + reg_store_single((float *)data_address, st0_ptr); + break; + case 011: /* fist m32int */ + clear_C1(); + reg_store_int32((long *)data_address, st0_ptr); + break; + case 012: /* fst m64real */ + clear_C1(); + reg_store_double((double *)data_address, st0_ptr); + break; + case 013: /* fist m16int */ + clear_C1(); + reg_store_int16((short *)data_address, st0_ptr); + break; + case 014: /* fstp m32real */ + clear_C1(); + if ( reg_store_single((float *)data_address, st0_ptr) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 015: /* fistp m32int */ + clear_C1(); + if ( reg_store_int32((long *)data_address, st0_ptr) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 016: /* fstp m64real */ + clear_C1(); + if ( reg_store_double((double *)data_address, st0_ptr) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 017: /* fistp m16int */ + clear_C1(); + if ( reg_store_int16((short *)data_address, st0_ptr) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 020: /* fldenv m14/28byte */ + fldenv(addr_modes, (char *)data_address); + /* Ensure that the values just loaded are not changed by + fix-up operations. */ + return 1; + case 022: /* frstor m94/108byte */ + frstor(addr_modes, (char *)data_address); + /* Ensure that the values just loaded are not changed by + fix-up operations. */ + return 1; + case 023: /* fbld m80dec */ + clear_C1(); + reg_load_bcd((char *)data_address, st0_ptr); + break; + case 024: /* fldcw */ + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_READ, data_address, 2); + control_word = get_fs_word((unsigned short *) data_address); + RE_ENTRANT_CHECK_ON; + if ( partial_status & ~control_word & CW_Exceptions ) + partial_status |= (SW_Summary | SW_Backward); + else + partial_status &= ~(SW_Summary | SW_Backward); +#ifdef PECULIAR_486 + control_word |= 0x40; /* An 80486 appears to always set this bit */ +#endif PECULIAR_486 + return 1; + case 025: /* fld m80real */ + clear_C1(); + reg_load_extended((long double *)data_address, st0_ptr); + break; + case 027: /* fild m64int */ + clear_C1(); + reg_load_int64((long long *)data_address, st0_ptr); + break; + case 030: /* fstenv m14/28byte */ + fstenv(addr_modes, (char *)data_address); + return 1; + case 032: /* fsave */ + fsave(addr_modes, (char *)data_address); + return 1; + case 033: /* fbstp m80dec */ + clear_C1(); + if ( reg_store_bcd((char *)data_address, st0_ptr) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 034: /* fstcw m16int */ + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE,data_address,2); + put_fs_word(control_word, (short *) data_address); + RE_ENTRANT_CHECK_ON; + return 1; + case 035: /* fstp m80real */ + clear_C1(); + if ( reg_store_extended((long double *)data_address, st0_ptr) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 036: /* fstsw m2byte */ + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE,data_address,2); + put_fs_word(status_word(),(short *) data_address); + RE_ENTRANT_CHECK_ON; + return 1; + case 037: /* fistp m64int */ + clear_C1(); + if ( reg_store_int64((long long *)data_address, st0_ptr) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + } + return 0; +} diff --git a/arch/i386/math-emu/mul_Xsig.S b/arch/i386/math-emu/mul_Xsig.S new file mode 100644 index 000000000..1d88d4466 --- /dev/null +++ b/arch/i386/math-emu/mul_Xsig.S @@ -0,0 +1,182 @@ +/*---------------------------------------------------------------------------+ + | mul_Xsig.S | + | | + | Multiply a 12 byte fixed point number by another fixed point number. | + | | + | Copyright (C) 1992,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | Call from C as: | + | void mul32_Xsig(Xsig *x, unsigned b) | + | | + | void mul64_Xsig(Xsig *x, unsigned long long *b) | + | | + | void mul_Xsig_Xsig(Xsig *x, unsigned *b) | + | | + | The result is neither rounded nor normalized, and the ls bit or so may | + | be wrong. | + | | + +---------------------------------------------------------------------------*/ + .file "mul_Xsig.S" + + +#include "fpu_asm.h" + +.text + .align 2,144 +.globl _mul32_Xsig +_mul32_Xsig: + pushl %ebp + movl %esp,%ebp + subl $16,%esp + pushl %esi + + movl PARAM1,%esi + movl PARAM2,%ecx + + xor %eax,%eax + movl %eax,-4(%ebp) + movl %eax,-8(%ebp) + + movl (%esi),%eax /* lsl of Xsig */ + mull %ecx /* msl of b */ + movl %edx,-12(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull %ecx /* msl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull %ecx /* msl of b */ + addl %eax,-8(%ebp) + adcl %edx,-4(%ebp) + + movl -12(%ebp),%eax + movl %eax,(%esi) + movl -8(%ebp),%eax + movl %eax,4(%esi) + movl -4(%ebp),%eax + movl %eax,8(%esi) + + popl %esi + leave + ret + + + .align 2,144 +.globl _mul64_Xsig +_mul64_Xsig: + pushl %ebp + movl %esp,%ebp + subl $16,%esp + pushl %esi + + movl PARAM1,%esi + movl PARAM2,%ecx + + xor %eax,%eax + movl %eax,-4(%ebp) + movl %eax,-8(%ebp) + + movl (%esi),%eax /* lsl of Xsig */ + mull 4(%ecx) /* msl of b */ + movl %edx,-12(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull (%ecx) /* lsl of b */ + addl %edx,-12(%ebp) + adcl $0,-8(%ebp) + adcl $0,-4(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull 4(%ecx) /* msl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull (%ecx) /* lsl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull 4(%ecx) /* msl of b */ + addl %eax,-8(%ebp) + adcl %edx,-4(%ebp) + + movl -12(%ebp),%eax + movl %eax,(%esi) + movl -8(%ebp),%eax + movl %eax,4(%esi) + movl -4(%ebp),%eax + movl %eax,8(%esi) + + popl %esi + leave + ret + + + + .align 2,144 +.globl _mul_Xsig_Xsig +_mul_Xsig_Xsig: + pushl %ebp + movl %esp,%ebp + subl $16,%esp + pushl %esi + + movl PARAM1,%esi + movl PARAM2,%ecx + + xor %eax,%eax + movl %eax,-4(%ebp) + movl %eax,-8(%ebp) + + movl (%esi),%eax /* lsl of Xsig */ + mull 8(%ecx) /* msl of b */ + movl %edx,-12(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull 4(%ecx) /* midl of b */ + addl %edx,-12(%ebp) + adcl $0,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull (%ecx) /* lsl of b */ + addl %edx,-12(%ebp) + adcl $0,-8(%ebp) + adcl $0,-4(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull 8(%ecx) /* msl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull 4(%ecx) /* midl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull 8(%ecx) /* msl of b */ + addl %eax,-8(%ebp) + adcl %edx,-4(%ebp) + + movl -12(%ebp),%edx + movl %edx,(%esi) + movl -8(%ebp),%edx + movl %edx,4(%esi) + movl -4(%ebp),%edx + movl %edx,8(%esi) + + popl %esi + leave + ret + diff --git a/arch/i386/math-emu/poly.h b/arch/i386/math-emu/poly.h new file mode 100644 index 000000000..397cb9e3e --- /dev/null +++ b/arch/i386/math-emu/poly.h @@ -0,0 +1,116 @@ +/*---------------------------------------------------------------------------+ + | poly.h | + | | + | Header file for the FPU-emu poly*.c source files. | + | | + | Copyright (C) 1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | Declarations and definitions for functions operating on Xsig (12-byte | + | extended-significand) quantities. | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _POLY_H +#define _POLY_H + +/* This 12-byte structure is used to improve the accuracy of computation + of transcendental functions. + Intended to be used to get results better than 8-byte computation + allows. 9-byte would probably be sufficient. + */ +typedef struct { + unsigned long lsw; + unsigned long midw; + unsigned long msw; +} Xsig; + +asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b, + unsigned long long *result); +asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x, + const unsigned long long terms[], const int n); + +asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult); +asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult); +asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult); + +asmlinkage void shr_Xsig(Xsig *, const int n); +asmlinkage int round_Xsig(Xsig *); +asmlinkage int norm_Xsig(Xsig *); +asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest); + +/* Macro to extract the most significant 32 bits from a long long */ +#define LL_MSW(x) (((unsigned long *)&x)[1]) + +/* Macro to initialize an Xsig struct */ +#define MK_XSIG(a,b,c) { c, b, a } + +/* Macro to access the 8 ms bytes of an Xsig as a long long */ +#define XSIG_LL(x) (*(unsigned long long *)&x.midw) + + +/* + Need to run gcc with optimizations on to get these to + actually be in-line. + */ + +/* Multiply two fixed-point 32 bit numbers. */ +extern inline void mul_32_32(const unsigned long arg1, + const unsigned long arg2, + unsigned long *out) +{ + asm volatile ("movl %1,%%eax; mull %2; movl %%edx,%0" \ + :"=g" (*out) \ + :"g" (arg1), "g" (arg2) \ + :"ax","dx"); +} + + +/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */ +extern inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2) +{ + asm volatile ("movl %1,%%edi; movl %2,%%esi; + movl (%%esi),%%eax; addl %%eax,(%%edi); + movl 4(%%esi),%%eax; adcl %%eax,4(%%edi); + movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);" + :"=g" (*dest):"g" (dest), "g" (x2) + :"ax","si","di"); +} + + +/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */ +/* Note: the constraints in the asm statement didn't always work properly + with gcc 2.5.8. Changing from using edi to using ecx got around the + problem, but keep fingers crossed! */ +extern inline int add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp) +{ + asm volatile ("movl %2,%%ecx; movl %3,%%esi; + movl (%%esi),%%eax; addl %%eax,(%%ecx); + movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx); + movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx); + jnc 0f; + rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx) + movl %4,%%ecx; incl (%%ecx) + movl $1,%%eax; jmp 1f; + 0: xorl %%eax,%%eax; + 1:" + :"=g" (*exp), "=g" (*dest) + :"g" (dest), "g" (x2), "g" (exp) + :"cx","si","ax"); +} + + +/* Negate (subtract from 1.0) the 12 byte Xsig */ +/* This is faster in a loop on my 386 than using the "neg" instruction. */ +extern inline void negate_Xsig(Xsig *x) +{ + asm volatile("movl %1,%%esi; " + "xorl %%ecx,%%ecx; " + "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi); " + "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi); " + "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi); " + :"=g" (*x):"g" (x):"si","ax","cx"); +} + +#endif _POLY_H diff --git a/arch/i386/math-emu/poly_2xm1.c b/arch/i386/math-emu/poly_2xm1.c new file mode 100644 index 000000000..f7c585d60 --- /dev/null +++ b/arch/i386/math-emu/poly_2xm1.c @@ -0,0 +1,152 @@ +/*---------------------------------------------------------------------------+ + | poly_2xm1.c | + | | + | Function to compute 2^x-1 by a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "poly.h" + + +#define HIPOWER 11 +static const unsigned long long lterms[HIPOWER] = +{ + 0x0000000000000000LL, /* This term done separately as 12 bytes */ + 0xf5fdeffc162c7543LL, + 0x1c6b08d704a0bfa6LL, + 0x0276556df749cc21LL, + 0x002bb0ffcf14f6b8LL, + 0x0002861225ef751cLL, + 0x00001ffcbfcd5422LL, + 0x00000162c005d5f1LL, + 0x0000000da96ccb1bLL, + 0x0000000078d1b897LL, + 0x000000000422b029LL +}; + +static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194); + +/* Four slices: 0.0 : 0.25 : 0.50 : 0.75 : 1.0, + These numbers are 2^(1/4), 2^(1/2), and 2^(3/4) + */ +static const Xsig shiftterm0 = MK_XSIG(0, 0, 0); +static const Xsig shiftterm1 = MK_XSIG(0x9837f051, 0x8db8a96f, 0x46ad2318); +static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3); +static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9); + +static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1, + &shiftterm2, &shiftterm3 }; + + +/*--- poly_2xm1() -----------------------------------------------------------+ + | Requires an argument which is TW_Valid and < 1. | + +---------------------------------------------------------------------------*/ +int poly_2xm1(FPU_REG const *arg, FPU_REG *result) +{ + long int exponent, shift; + unsigned long long Xll; + Xsig accumulator, Denom, argSignif; + + + exponent = arg->exp - EXP_BIAS; + +#ifdef PARANOID + if ( (exponent >= 0) /* Don't want a |number| >= 1.0 */ + || (arg->tag != TW_Valid) ) + { + /* Number negative, too large, or not Valid. */ + EXCEPTION(EX_INTERNAL|0x127); + return 1; + } +#endif PARANOID + + argSignif.lsw = 0; + XSIG_LL(argSignif) = Xll = significand(arg); + + if ( exponent == -1 ) + { + shift = (argSignif.msw & 0x40000000) ? 3 : 2; + /* subtract 0.5 or 0.75 */ + exponent -= 2; + XSIG_LL(argSignif) <<= 2; + Xll <<= 2; + } + else if ( exponent == -2 ) + { + shift = 1; + /* subtract 0.25 */ + exponent--; + XSIG_LL(argSignif) <<= 1; + Xll <<= 1; + } + else + shift = 0; + + if ( exponent < -2 ) + { + /* Shift the argument right by the required places. */ + if ( shrx(&Xll, -2-exponent) >= 0x80000000U ) + Xll++; /* round up */ + } + + accumulator.lsw = accumulator.midw = accumulator.msw = 0; + polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1); + mul_Xsig_Xsig(&accumulator, &argSignif); + shr_Xsig(&accumulator, 3); + + mul_Xsig_Xsig(&argSignif, &hiterm); /* The leading term */ + add_two_Xsig(&accumulator, &argSignif, &exponent); + + if ( shift ) + { + /* The argument is large, use the identity: + f(x+a) = f(a) * (f(x) + 1) - 1; + */ + shr_Xsig(&accumulator, - exponent); + accumulator.msw |= 0x80000000; /* add 1.0 */ + mul_Xsig_Xsig(&accumulator, shiftterm[shift]); + accumulator.msw &= 0x3fffffff; /* subtract 1.0 */ + exponent = 1; + } + + if ( arg->sign != SIGN_POS ) + { + /* The argument is negative, use the identity: + f(-x) = -f(x) / (1 + f(x)) + */ + Denom.lsw = accumulator.lsw; + XSIG_LL(Denom) = XSIG_LL(accumulator); + if ( exponent < 0 ) + shr_Xsig(&Denom, - exponent); + else if ( exponent > 0 ) + { + /* exponent must be 1 here */ + XSIG_LL(Denom) <<= 1; + if ( Denom.lsw & 0x80000000 ) + XSIG_LL(Denom) |= 1; + (Denom.lsw) <<= 1; + } + Denom.msw |= 0x80000000; /* add 1.0 */ + div_Xsig(&accumulator, &Denom, &accumulator); + } + + /* Convert to 64 bit signed-compatible */ + exponent += round_Xsig(&accumulator); + + significand(result) = XSIG_LL(accumulator); + result->tag = TW_Valid; + result->exp = exponent + EXP_BIAS; + result->sign = arg->sign; + + return 0; + +} diff --git a/arch/i386/math-emu/poly_atan.c b/arch/i386/math-emu/poly_atan.c new file mode 100644 index 000000000..6edca625f --- /dev/null +++ b/arch/i386/math-emu/poly_atan.c @@ -0,0 +1,197 @@ +/*---------------------------------------------------------------------------+ + | poly_atan.c | + | | + | Compute the arctan of a FPU_REG, using a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "control_w.h" +#include "poly.h" + + +#define HIPOWERon 6 /* odd poly, negative terms */ +static const unsigned long long oddnegterms[HIPOWERon] = +{ + 0x0000000000000000LL, /* Dummy (not for - 1.0) */ + 0x015328437f756467LL, + 0x0005dda27b73dec6LL, + 0x0000226bf2bfb91aLL, + 0x000000ccc439c5f7LL, + 0x0000000355438407LL +} ; + +#define HIPOWERop 6 /* odd poly, positive terms */ +static const unsigned long long oddplterms[HIPOWERop] = +{ +/* 0xaaaaaaaaaaaaaaabLL, transferred to fixedpterm[] */ + 0x0db55a71875c9ac2LL, + 0x0029fce2d67880b0LL, + 0x0000dfd3908b4596LL, + 0x00000550fd61dab4LL, + 0x0000001c9422b3f9LL, + 0x000000003e3301e1LL +}; + +static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL; + +static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa); + +static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b); + + +/*--- poly_atan() -----------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_atan(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *result) +{ + char transformed, inverted, + sign1 = arg1->sign, sign2 = arg2->sign; + long int exponent, dummy_exp; + Xsig accumulator, Numer, Denom, accumulatore, argSignif, + argSq, argSqSq; + + + arg1->sign = arg2->sign = SIGN_POS; + if ( (compare(arg2) & ~COMP_Denormal) == COMP_A_lt_B ) + { + inverted = 1; + exponent = arg1->exp - arg2->exp; + Numer.lsw = Denom.lsw = 0; + XSIG_LL(Numer) = significand(arg1); + XSIG_LL(Denom) = significand(arg2); + } + else + { + inverted = 0; + exponent = arg2->exp - arg1->exp; + Numer.lsw = Denom.lsw = 0; + XSIG_LL(Numer) = significand(arg2); + XSIG_LL(Denom) = significand(arg1); + } + div_Xsig(&Numer, &Denom, &argSignif); + exponent += norm_Xsig(&argSignif); + + if ( (exponent >= -1) + || ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) ) + { + /* The argument is greater than sqrt(2)-1 (=0.414213562...) */ + /* Convert the argument by an identity for atan */ + transformed = 1; + + if ( exponent >= 0 ) + { +#ifdef PARANOID + if ( !( (exponent == 0) && + (argSignif.lsw == 0) && (argSignif.midw == 0) && + (argSignif.msw == 0x80000000) ) ) + { + EXCEPTION(EX_INTERNAL|0x104); /* There must be a logic error */ + return; + } +#endif PARANOID + argSignif.msw = 0; /* Make the transformed arg -> 0.0 */ + } + else + { + Numer.lsw = Denom.lsw = argSignif.lsw; + XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif); + + if ( exponent < -1 ) + shr_Xsig(&Numer, -1-exponent); + negate_Xsig(&Numer); + + shr_Xsig(&Denom, -exponent); + Denom.msw |= 0x80000000; + + div_Xsig(&Numer, &Denom, &argSignif); + + exponent = -1 + norm_Xsig(&argSignif); + } + } + else + { + transformed = 0; + } + + argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw; + argSq.msw = argSignif.msw; + mul_Xsig_Xsig(&argSq, &argSq); + + argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw; + mul_Xsig_Xsig(&argSqSq, &argSqSq); + + accumulatore.lsw = argSq.lsw; + XSIG_LL(accumulatore) = XSIG_LL(argSq); + + shr_Xsig(&argSq, 2*(-1-exponent-1)); + shr_Xsig(&argSqSq, 4*(-1-exponent-1)); + + /* Now have argSq etc with binary point at the left + .1xxxxxxxx */ + + /* Do the basic fixed point polynomial evaluation */ + accumulator.msw = accumulator.midw = accumulator.lsw = 0; + polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), + oddplterms, HIPOWERop-1); + mul64_Xsig(&accumulator, &XSIG_LL(argSq)); + negate_Xsig(&accumulator); + polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1); + negate_Xsig(&accumulator); + add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp); + + mul64_Xsig(&accumulatore, &denomterm); + shr_Xsig(&accumulatore, 1 + 2*(-1-exponent)); + accumulatore.msw |= 0x80000000; + + div_Xsig(&accumulator, &accumulatore, &accumulator); + + mul_Xsig_Xsig(&accumulator, &argSignif); + mul_Xsig_Xsig(&accumulator, &argSq); + + shr_Xsig(&accumulator, 3); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &argSignif); + + if ( transformed ) + { + /* compute pi/4 - accumulator */ + shr_Xsig(&accumulator, -1-exponent); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &pi_signif); + exponent = -1; + } + + if ( inverted ) + { + /* compute pi/2 - accumulator */ + shr_Xsig(&accumulator, -exponent); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &pi_signif); + exponent = 0; + } + + if ( sign1 ) + { + /* compute pi - accumulator */ + shr_Xsig(&accumulator, 1 - exponent); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &pi_signif); + exponent = 1; + } + + exponent += round_Xsig(&accumulator); + significand(result) = XSIG_LL(accumulator); + result->exp = exponent + EXP_BIAS; + result->tag = TW_Valid; + result->sign = sign2; + +} diff --git a/arch/i386/math-emu/poly_l2.c b/arch/i386/math-emu/poly_l2.c new file mode 100644 index 000000000..1677f4aff --- /dev/null +++ b/arch/i386/math-emu/poly_l2.c @@ -0,0 +1,255 @@ +/*---------------------------------------------------------------------------+ + | poly_l2.c | + | | + | Compute the base 2 log of a FPU_REG, using a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "poly.h" + + + +static void log2_kernel(FPU_REG const *arg, + Xsig *accum_result, long int *expon); + + +/*--- poly_l2() -------------------------------------------------------------+ + | Base 2 logarithm by a polynomial approximation. | + +---------------------------------------------------------------------------*/ +void poly_l2(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result) +{ + long int exponent, expon, expon_expon; + Xsig accumulator, expon_accum, yaccum; + char sign; + FPU_REG x; + + + exponent = arg->exp - EXP_BIAS; + + /* From arg, make a number > sqrt(2)/2 and < sqrt(2) */ + if ( arg->sigh > (unsigned)0xb504f334 ) + { + /* Treat as sqrt(2)/2 < arg < 1 */ + significand(&x) = - significand(arg); + x.sign = SIGN_NEG; + x.tag = TW_Valid; + x.exp = EXP_BIAS-1; + exponent++; + normalize(&x); + } + else + { + /* Treat as 1 <= arg < sqrt(2) */ + x.sigh = arg->sigh - 0x80000000; + x.sigl = arg->sigl; + x.sign = SIGN_POS; + x.tag = TW_Valid; + x.exp = EXP_BIAS; + normalize(&x); + } + + if ( x.tag == TW_Zero ) + { + expon = 0; + accumulator.msw = accumulator.midw = accumulator.lsw = 0; + } + else + { + log2_kernel(&x, &accumulator, &expon); + } + + sign = exponent < 0; + if ( sign ) exponent = -exponent; + expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0; + if ( exponent ) + { + expon_expon = 31 + norm_Xsig(&expon_accum); + shr_Xsig(&accumulator, expon_expon - expon); + + if ( sign ^ (x.sign == SIGN_NEG) ) + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &expon_accum); + } + else + { + expon_expon = expon; + sign = x.sign; + } + + yaccum.lsw = 0; XSIG_LL(yaccum) = significand(y); + mul_Xsig_Xsig(&accumulator, &yaccum); + + expon_expon += round_Xsig(&accumulator); + + if ( accumulator.msw == 0 ) + { + reg_move(&CONST_Z, y); + } + else + { + result->exp = expon_expon + y->exp + 1; + significand(result) = XSIG_LL(accumulator); + result->tag = TW_Valid; /* set the tags to Valid */ + result->sign = sign ^ y->sign; + } + + return; +} + + +/*--- poly_l2p1() -----------------------------------------------------------+ + | Base 2 logarithm by a polynomial approximation. | + | log2(x+1) | + +---------------------------------------------------------------------------*/ +int poly_l2p1(FPU_REG const *arg, FPU_REG const *y, FPU_REG *result) +{ + char sign; + long int exponent; + Xsig accumulator, yaccum; + + + sign = arg->sign; + + if ( arg->exp < EXP_BIAS ) + { + log2_kernel(arg, &accumulator, &exponent); + + yaccum.lsw = 0; + XSIG_LL(yaccum) = significand(y); + mul_Xsig_Xsig(&accumulator, &yaccum); + + exponent += round_Xsig(&accumulator); + + result->exp = exponent + y->exp + 1; + significand(result) = XSIG_LL(accumulator); + result->tag = TW_Valid; /* set the tags to Valid */ + result->sign = sign ^ y->sign; + + return 0; + } + else + { + /* The magnitude of arg is far too large. */ + reg_move(y, result); + if ( sign != SIGN_POS ) + { + /* Trying to get the log of a negative number. */ + return 1; + } + else + { + return 0; + } + } + +} + + + + +#undef HIPOWER +#define HIPOWER 10 +static const unsigned long long logterms[HIPOWER] = +{ + 0x2a8eca5705fc2ef0LL, + 0xf6384ee1d01febceLL, + 0x093bb62877cdf642LL, + 0x006985d8a9ec439bLL, + 0x0005212c4f55a9c8LL, + 0x00004326a16927f0LL, + 0x0000038d1d80a0e7LL, + 0x0000003141cc80c6LL, + 0x00000002b1668c9fLL, + 0x000000002c7a46aaLL +}; + +static const unsigned long leadterm = 0xb8000000; + + +/*--- log2_kernel() ---------------------------------------------------------+ + | Base 2 logarithm by a polynomial approximation. | + | log2(x+1) | + +---------------------------------------------------------------------------*/ +static void log2_kernel(FPU_REG const *arg, Xsig *accum_result, + long int *expon) +{ + char sign; + long int exponent, adj; + unsigned long long Xsq; + Xsig accumulator, Numer, Denom, argSignif, arg_signif; + + sign = arg->sign; + + exponent = arg->exp - EXP_BIAS; + Numer.lsw = Denom.lsw = 0; + XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg); + if ( sign == SIGN_POS ) + { + shr_Xsig(&Denom, 2 - (1 + exponent)); + Denom.msw |= 0x80000000; + div_Xsig(&Numer, &Denom, &argSignif); + } + else + { + shr_Xsig(&Denom, 1 - (1 + exponent)); + negate_Xsig(&Denom); + if ( Denom.msw & 0x80000000 ) + { + div_Xsig(&Numer, &Denom, &argSignif); + exponent ++; + } + else + { + /* Denom must be 1.0 */ + argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw; + argSignif.msw = Numer.msw; + } + } + +#ifndef PECULIAR_486 + /* Should check here that |local_arg| is within the valid range */ + if ( exponent >= -2 ) + { + if ( (exponent > -2) || + (argSignif.msw > (unsigned)0xafb0ccc0) ) + { + /* The argument is too large */ + } + } +#endif PECULIAR_486 + + arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif); + adj = norm_Xsig(&argSignif); + accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif); + mul_Xsig_Xsig(&accumulator, &accumulator); + shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj))); + Xsq = XSIG_LL(accumulator); + if ( accumulator.lsw & 0x80000000 ) + Xsq++; + + accumulator.msw = accumulator.midw = accumulator.lsw = 0; + /* Do the basic fixed point polynomial evaluation */ + polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1); + + mul_Xsig_Xsig(&accumulator, &argSignif); + shr_Xsig(&accumulator, 6 - adj); + + mul32_Xsig(&arg_signif, leadterm); + add_two_Xsig(&accumulator, &arg_signif, &exponent); + + *expon = exponent + 1; + accum_result->lsw = accumulator.lsw; + accum_result->midw = accumulator.midw; + accum_result->msw = accumulator.msw; + +} diff --git a/arch/i386/math-emu/poly_sin.c b/arch/i386/math-emu/poly_sin.c new file mode 100644 index 000000000..03db5b6aa --- /dev/null +++ b/arch/i386/math-emu/poly_sin.c @@ -0,0 +1,408 @@ +/*---------------------------------------------------------------------------+ + | poly_sin.c | + | | + | Computation of an approximation of the sin function and the cosine | + | function by a polynomial. | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "poly.h" + + +#define N_COEFF_P 4 +#define N_COEFF_N 4 + +static const unsigned long long pos_terms_l[N_COEFF_P] = +{ + 0xaaaaaaaaaaaaaaabLL, + 0x00d00d00d00cf906LL, + 0x000006b99159a8bbLL, + 0x000000000d7392e6LL +}; + +static const unsigned long long neg_terms_l[N_COEFF_N] = +{ + 0x2222222222222167LL, + 0x0002e3bc74aab624LL, + 0x0000000b09229062LL, + 0x00000000000c7973LL +}; + + + +#define N_COEFF_PH 4 +#define N_COEFF_NH 4 +static const unsigned long long pos_terms_h[N_COEFF_PH] = +{ + 0x0000000000000000LL, + 0x05b05b05b05b0406LL, + 0x000049f93edd91a9LL, + 0x00000000c9c9ed62LL +}; + +static const unsigned long long neg_terms_h[N_COEFF_NH] = +{ + 0xaaaaaaaaaaaaaa98LL, + 0x001a01a01a019064LL, + 0x0000008f76c68a77LL, + 0x0000000000d58f5eLL +}; + + +/*--- poly_sine() -----------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_sine(FPU_REG const *arg, FPU_REG *result) +{ + int exponent, echange; + Xsig accumulator, argSqrd, argTo4; + unsigned long fix_up, adj; + unsigned long long fixed_arg; + + +#ifdef PARANOID + if ( arg->tag == TW_Zero ) + { + /* Return 0.0 */ + reg_move(&CONST_Z, result); + return; + } +#endif PARANOID + + exponent = arg->exp - EXP_BIAS; + + accumulator.lsw = accumulator.midw = accumulator.msw = 0; + + /* Split into two ranges, for arguments below and above 1.0 */ + /* The boundary between upper and lower is approx 0.88309101259 */ + if ( (exponent < -1) || ((exponent == -1) && (arg->sigh <= 0xe21240aa)) ) + { + /* The argument is <= 0.88309101259 */ + + argSqrd.msw = arg->sigh; argSqrd.midw = arg->sigl; argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &significand(arg)); + shr_Xsig(&argSqrd, 2*(-1-exponent)); + argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; + argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, + N_COEFF_N-1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, + N_COEFF_P-1); + + shr_Xsig(&accumulator, 2); /* Divide by four */ + accumulator.msw |= 0x80000000; /* Add 1.0 */ + + mul64_Xsig(&accumulator, &significand(arg)); + mul64_Xsig(&accumulator, &significand(arg)); + mul64_Xsig(&accumulator, &significand(arg)); + + /* Divide by four, FPU_REG compatible, etc */ + exponent = 3*exponent + EXP_BIAS; + + /* The minimum exponent difference is 3 */ + shr_Xsig(&accumulator, arg->exp - exponent); + + negate_Xsig(&accumulator); + XSIG_LL(accumulator) += significand(arg); + + echange = round_Xsig(&accumulator); + + result->exp = arg->exp + echange; + } + else + { + /* The argument is > 0.88309101259 */ + /* We use sin(arg) = cos(pi/2-arg) */ + + fixed_arg = significand(arg); + + if ( exponent == 0 ) + { + /* The argument is >= 1.0 */ + + /* Put the binary point at the left. */ + fixed_arg <<= 1; + } + /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ + fixed_arg = 0x921fb54442d18469LL - fixed_arg; + + XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &fixed_arg); + + XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, + N_COEFF_NH-1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, + N_COEFF_PH-1); + negate_Xsig(&accumulator); + + mul64_Xsig(&accumulator, &fixed_arg); + mul64_Xsig(&accumulator, &fixed_arg); + + shr_Xsig(&accumulator, 3); + negate_Xsig(&accumulator); + + add_Xsig_Xsig(&accumulator, &argSqrd); + + shr_Xsig(&accumulator, 1); + + accumulator.lsw |= 1; /* A zero accumulator here would cause problems */ + negate_Xsig(&accumulator); + + /* The basic computation is complete. Now fix the answer to + compensate for the error due to the approximation used for + pi/2 + */ + + /* This has an exponent of -65 */ + fix_up = 0x898cc517; + /* The fix-up needs to be improved for larger args */ + if ( argSqrd.msw & 0xffc00000 ) + { + /* Get about 32 bit precision in these: */ + mul_32_32(0x898cc517, argSqrd.msw, &adj); + fix_up -= adj/6; + } + mul_32_32(fix_up, LL_MSW(fixed_arg), &fix_up); + + adj = accumulator.lsw; /* temp save */ + accumulator.lsw -= fix_up; + if ( accumulator.lsw > adj ) + XSIG_LL(accumulator) --; + + echange = round_Xsig(&accumulator); + + result->exp = EXP_BIAS - 1 + echange; + } + + significand(result) = XSIG_LL(accumulator); + result->tag = TW_Valid; + result->sign = arg->sign; + +#ifdef PARANOID + if ( (result->exp >= EXP_BIAS) + && (significand(result) > 0x8000000000000000LL) ) + { + EXCEPTION(EX_INTERNAL|0x150); + } +#endif PARANOID + +} + + + +/*--- poly_cos() ------------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_cos(FPU_REG const *arg, FPU_REG *result) +{ + long int exponent, exp2, echange; + Xsig accumulator, argSqrd, fix_up, argTo4; + unsigned long adj; + unsigned long long fixed_arg; + + +#ifdef PARANOID + if ( arg->tag == TW_Zero ) + { + /* Return 1.0 */ + reg_move(&CONST_1, result); + return; + } + + if ( (arg->exp > EXP_BIAS) + || ((arg->exp == EXP_BIAS) + && (significand(arg) > 0xc90fdaa22168c234LL)) ) + { + EXCEPTION(EX_Invalid); + reg_move(&CONST_QNaN, result); + return; + } +#endif PARANOID + + exponent = arg->exp - EXP_BIAS; + + accumulator.lsw = accumulator.midw = accumulator.msw = 0; + + if ( (exponent < -1) || ((exponent == -1) && (arg->sigh <= 0xb00d6f54)) ) + { + /* arg is < 0.687705 */ + + argSqrd.msw = arg->sigh; argSqrd.midw = arg->sigl; argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &significand(arg)); + + if ( exponent < -1 ) + { + /* shift the argument right by the required places */ + shr_Xsig(&argSqrd, 2*(-1-exponent)); + } + + argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; + argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, + N_COEFF_NH-1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, + N_COEFF_PH-1); + negate_Xsig(&accumulator); + + mul64_Xsig(&accumulator, &significand(arg)); + mul64_Xsig(&accumulator, &significand(arg)); + shr_Xsig(&accumulator, -2*(1+exponent)); + + shr_Xsig(&accumulator, 3); + negate_Xsig(&accumulator); + + add_Xsig_Xsig(&accumulator, &argSqrd); + + shr_Xsig(&accumulator, 1); + + /* It doesn't matter if accumulator is all zero here, the + following code will work ok */ + negate_Xsig(&accumulator); + + if ( accumulator.lsw & 0x80000000 ) + XSIG_LL(accumulator) ++; + if ( accumulator.msw == 0 ) + { + /* The result is 1.0 */ + reg_move(&CONST_1, result); + } + else + { + significand(result) = XSIG_LL(accumulator); + + /* will be a valid positive nr with expon = -1 */ + *(short *)&(result->sign) = 0; + result->exp = EXP_BIAS - 1; + } + } + else + { + fixed_arg = significand(arg); + + if ( exponent == 0 ) + { + /* The argument is >= 1.0 */ + + /* Put the binary point at the left. */ + fixed_arg <<= 1; + } + /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ + fixed_arg = 0x921fb54442d18469LL - fixed_arg; + + exponent = -1; + exp2 = -1; + + /* A shift is needed here only for a narrow range of arguments, + i.e. for fixed_arg approx 2^-32, but we pick up more... */ + if ( !(LL_MSW(fixed_arg) & 0xffff0000) ) + { + fixed_arg <<= 16; + exponent -= 16; + exp2 -= 16; + } + + XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &fixed_arg); + + if ( exponent < -1 ) + { + /* shift the argument right by the required places */ + shr_Xsig(&argSqrd, 2*(-1-exponent)); + } + + argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; + argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, + N_COEFF_N-1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, + N_COEFF_P-1); + + shr_Xsig(&accumulator, 2); /* Divide by four */ + accumulator.msw |= 0x80000000; /* Add 1.0 */ + + mul64_Xsig(&accumulator, &fixed_arg); + mul64_Xsig(&accumulator, &fixed_arg); + mul64_Xsig(&accumulator, &fixed_arg); + + /* Divide by four, FPU_REG compatible, etc */ + exponent = 3*exponent; + + /* The minimum exponent difference is 3 */ + shr_Xsig(&accumulator, exp2 - exponent); + + negate_Xsig(&accumulator); + XSIG_LL(accumulator) += fixed_arg; + + /* The basic computation is complete. Now fix the answer to + compensate for the error due to the approximation used for + pi/2 + */ + + /* This has an exponent of -65 */ + XSIG_LL(fix_up) = 0x898cc51701b839a2ll; + fix_up.lsw = 0; + + /* The fix-up needs to be improved for larger args */ + if ( argSqrd.msw & 0xffc00000 ) + { + /* Get about 32 bit precision in these: */ + mul_32_32(0x898cc517, argSqrd.msw, &adj); + fix_up.msw -= adj/2; + mul_32_32(0x898cc517, argTo4.msw, &adj); + fix_up.msw += adj/24; + } + + exp2 += norm_Xsig(&accumulator); + shr_Xsig(&accumulator, 1); /* Prevent overflow */ + exp2++; + shr_Xsig(&fix_up, 65 + exp2); + + add_Xsig_Xsig(&accumulator, &fix_up); + + echange = round_Xsig(&accumulator); + + result->exp = exp2 + EXP_BIAS + echange; + *(short *)&(result->sign) = 0; /* Is a valid positive nr */ + significand(result) = XSIG_LL(accumulator); + } + +#ifdef PARANOID + if ( (result->exp >= EXP_BIAS) + && (significand(result) > 0x8000000000000000LL) ) + { + EXCEPTION(EX_INTERNAL|0x151); + } +#endif PARANOID + +} diff --git a/arch/i386/math-emu/poly_tan.c b/arch/i386/math-emu/poly_tan.c new file mode 100644 index 000000000..d9b09e438 --- /dev/null +++ b/arch/i386/math-emu/poly_tan.c @@ -0,0 +1,213 @@ +/*---------------------------------------------------------------------------+ + | poly_tan.c | + | | + | Compute the tan of a FPU_REG, using a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "poly.h" + + +#define HiPOWERop 3 /* odd poly, positive terms */ +static const unsigned long long oddplterm[HiPOWERop] = +{ + 0x0000000000000000LL, + 0x0051a1cf08fca228LL, + 0x0000000071284ff7LL +}; + +#define HiPOWERon 2 /* odd poly, negative terms */ +static const unsigned long long oddnegterm[HiPOWERon] = +{ + 0x1291a9a184244e80LL, + 0x0000583245819c21LL +}; + +#define HiPOWERep 2 /* even poly, positive terms */ +static const unsigned long long evenplterm[HiPOWERep] = +{ + 0x0e848884b539e888LL, + 0x00003c7f18b887daLL +}; + +#define HiPOWERen 2 /* even poly, negative terms */ +static const unsigned long long evennegterm[HiPOWERen] = +{ + 0xf1f0200fd51569ccLL, + 0x003afb46105c4432LL +}; + +static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL; + + +/*--- poly_tan() ------------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_tan(FPU_REG const *arg, FPU_REG *result) +{ + long int exponent; + int invert; + Xsig argSq, argSqSq, accumulatoro, accumulatore, accum, + argSignif, fix_up; + unsigned long adj; + + exponent = arg->exp - EXP_BIAS; + +#ifdef PARANOID + if ( arg->sign != 0 ) /* Can't hack a number < 0.0 */ + { arith_invalid(result); return; } /* Need a positive number */ +#endif PARANOID + + /* Split the problem into two domains, smaller and larger than pi/4 */ + if ( (exponent == 0) || ((exponent == -1) && (arg->sigh > 0xc90fdaa2)) ) + { + /* The argument is greater than (approx) pi/4 */ + invert = 1; + accum.lsw = 0; + XSIG_LL(accum) = significand(arg); + + if ( exponent == 0 ) + { + /* The argument is >= 1.0 */ + /* Put the binary point at the left. */ + XSIG_LL(accum) <<= 1; + } + /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ + XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum); + + argSignif.lsw = accum.lsw; + XSIG_LL(argSignif) = XSIG_LL(accum); + exponent = -1 + norm_Xsig(&argSignif); + } + else + { + invert = 0; + argSignif.lsw = 0; + XSIG_LL(accum) = XSIG_LL(argSignif) = significand(arg); + + if ( exponent < -1 ) + { + /* shift the argument right by the required places */ + if ( shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U ) + XSIG_LL(accum) ++; /* round up */ + } + } + + XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw; + mul_Xsig_Xsig(&argSq, &argSq); + XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw; + mul_Xsig_Xsig(&argSqSq, &argSqSq); + + /* Compute the negative terms for the numerator polynomial */ + accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0; + polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1); + mul_Xsig_Xsig(&accumulatoro, &argSq); + negate_Xsig(&accumulatoro); + /* Add the positive terms */ + polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1); + + + /* Compute the positive terms for the denominator polynomial */ + accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0; + polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1); + mul_Xsig_Xsig(&accumulatore, &argSq); + negate_Xsig(&accumulatore); + /* Add the negative terms */ + polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1); + /* Multiply by arg^2 */ + mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); + mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); + /* de-normalize and divide by 2 */ + shr_Xsig(&accumulatore, -2*(1+exponent) + 1); + negate_Xsig(&accumulatore); /* This does 1 - accumulator */ + + /* Now find the ratio. */ + if ( accumulatore.msw == 0 ) + { + /* accumulatoro must contain 1.0 here, (actually, 0) but it + really doesn't matter what value we use because it will + have negligible effect in later calculations + */ + XSIG_LL(accum) = 0x8000000000000000LL; + accum.lsw = 0; + } + else + { + div_Xsig(&accumulatoro, &accumulatore, &accum); + } + + /* Multiply by 1/3 * arg^3 */ + mul64_Xsig(&accum, &XSIG_LL(argSignif)); + mul64_Xsig(&accum, &XSIG_LL(argSignif)); + mul64_Xsig(&accum, &XSIG_LL(argSignif)); + mul64_Xsig(&accum, &twothirds); + shr_Xsig(&accum, -2*(exponent+1)); + + /* tan(arg) = arg + accum */ + add_two_Xsig(&accum, &argSignif, &exponent); + + if ( invert ) + { + /* We now have the value of tan(pi_2 - arg) where pi_2 is an + approximation for pi/2 + */ + /* The next step is to fix the answer to compensate for the + error due to the approximation used for pi/2 + */ + + /* This is (approx) delta, the error in our approx for pi/2 + (see above). It has an exponent of -65 + */ + XSIG_LL(fix_up) = 0x898cc51701b839a2LL; + fix_up.lsw = 0; + + if ( exponent == 0 ) + adj = 0xffffffff; /* We want approx 1.0 here, but + this is close enough. */ + else if ( exponent > -30 ) + { + adj = accum.msw >> -(exponent+1); /* tan */ + mul_32_32(adj, adj, &adj); /* tan^2 */ + } + else + adj = 0; + mul_32_32(0x898cc517, adj, &adj); /* delta * tan^2 */ + + fix_up.msw += adj; + if ( !(fix_up.msw & 0x80000000) ) /* did fix_up overflow ? */ + { + /* Yes, we need to add an msb */ + shr_Xsig(&fix_up, 1); + fix_up.msw |= 0x80000000; + shr_Xsig(&fix_up, 64 + exponent); + } + else + shr_Xsig(&fix_up, 65 + exponent); + + add_two_Xsig(&accum, &fix_up, &exponent); + + /* accum now contains tan(pi/2 - arg). + Use tan(arg) = 1.0 / tan(pi/2 - arg) + */ + accumulatoro.lsw = accumulatoro.midw = 0; + accumulatoro.msw = 0x80000000; + div_Xsig(&accumulatoro, &accum, &accum); + exponent = - exponent - 1; + } + + /* Transfer the result */ + round_Xsig(&accum); + *(short *)&(result->sign) = 0; + significand(result) = XSIG_LL(accum); + result->exp = EXP_BIAS + exponent; + +} diff --git a/arch/i386/math-emu/polynom_Xsig.S b/arch/i386/math-emu/polynom_Xsig.S new file mode 100644 index 000000000..585221f96 --- /dev/null +++ b/arch/i386/math-emu/polynom_Xsig.S @@ -0,0 +1,137 @@ +/*---------------------------------------------------------------------------+ + | polynomial_Xsig.S | + | | + | Fixed point arithmetic polynomial evaluation. | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | Call from C as: | + | void polynomial_Xsig(Xsig *accum, unsigned long long x, | + | unsigned long long terms[], int n) | + | | + | Computes: | + | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | + | and adds the result to the 12 byte Xsig. | + | The terms[] are each 8 bytes, but all computation is performed to 12 byte | + | precision. | + | | + | This function must be used carefully: most overflow of intermediate | + | results is controlled, but overflow of the result is not. | + | | + +---------------------------------------------------------------------------*/ + .file "polynomial_Xsig.S" + +#include "fpu_asm.h" + + +#define TERM_SIZE $8 +#define SUM_MS -20(%ebp) /* sum ms long */ +#define SUM_MIDDLE -24(%ebp) /* sum middle long */ +#define SUM_LS -28(%ebp) /* sum ls long */ +#define ACCUM_MS -4(%ebp) /* accum ms long */ +#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ +#define ACCUM_LS -12(%ebp) /* accum ls long */ +#define OVERFLOWED -16(%ebp) /* addition overflow flag */ + +.text + .align 2,144 +.globl _polynomial_Xsig +_polynomial_Xsig: + pushl %ebp + movl %esp,%ebp + subl $32,%esp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM2,%esi /* x */ + movl PARAM3,%edi /* terms */ + + movl TERM_SIZE,%eax + mull PARAM4 /* n */ + addl %eax,%edi + + movl 4(%edi),%edx /* terms[n] */ + movl %edx,SUM_MS + movl (%edi),%edx /* terms[n] */ + movl %edx,SUM_MIDDLE + xor %eax,%eax + movl %eax,SUM_LS + movb %al,OVERFLOWED + + subl TERM_SIZE,%edi + decl PARAM4 + js L_accum_done + +L_accum_loop: + xor %eax,%eax + movl %eax,ACCUM_MS + movl %eax,ACCUM_MIDDLE + + movl SUM_MIDDLE,%eax + mull (%esi) /* x ls long */ + movl %edx,ACCUM_LS + + movl SUM_MIDDLE,%eax + mull 4(%esi) /* x ms long */ + addl %eax,ACCUM_LS + adcl %edx,ACCUM_MIDDLE + adcl $0,ACCUM_MS + + movl SUM_MS,%eax + mull (%esi) /* x ls long */ + addl %eax,ACCUM_LS + adcl %edx,ACCUM_MIDDLE + adcl $0,ACCUM_MS + + movl SUM_MS,%eax + mull 4(%esi) /* x ms long */ + addl %eax,ACCUM_MIDDLE + adcl %edx,ACCUM_MS + + testb $0xff,OVERFLOWED + jz L_no_overflow + + movl (%esi),%eax + addl %eax,ACCUM_MIDDLE + movl 4(%esi),%eax + adcl %eax,ACCUM_MS /* This could overflow too */ + +L_no_overflow: + +/* + * Now put the sum of next term and the accumulator + * into the sum register + */ + movl ACCUM_LS,%eax + addl (%edi),%eax /* term ls long */ + movl %eax,SUM_LS + movl ACCUM_MIDDLE,%eax + adcl (%edi),%eax /* term ls long */ + movl %eax,SUM_MIDDLE + movl ACCUM_MS,%eax + adcl 4(%edi),%eax /* term ms long */ + movl %eax,SUM_MS + sbbb %al,%al + movb %al,OVERFLOWED /* Used in the next iteration */ + + subl TERM_SIZE,%edi + decl PARAM4 + jns L_accum_loop + +L_accum_done: + movl PARAM1,%edi /* accum */ + movl SUM_LS,%eax + addl %eax,(%edi) + movl SUM_MIDDLE,%eax + adcl %eax,4(%edi) + movl SUM_MS,%eax + adcl %eax,8(%edi) + + popl %ebx + popl %edi + popl %esi + leave + ret diff --git a/arch/i386/math-emu/reg_add_sub.c b/arch/i386/math-emu/reg_add_sub.c new file mode 100644 index 000000000..d70889b40 --- /dev/null +++ b/arch/i386/math-emu/reg_add_sub.c @@ -0,0 +1,318 @@ +/*---------------------------------------------------------------------------+ + | reg_add_sub.c | + | | + | Functions to add or subtract two registers and put the result in a third. | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | For each function, the destination may be any FPU_REG, including one of | + | the source FPU_REGs. | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "fpu_system.h" + + +int reg_add(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest, int control_w) +{ + char saved_sign = dest->sign; + int diff; + + if ( !(a->tag | b->tag) ) + { + /* Both registers are valid */ + if (!(a->sign ^ b->sign)) + { + /* signs are the same */ + dest->sign = a->sign; + if ( reg_u_add(a, b, dest, control_w) ) + { + dest->sign = saved_sign; + return 1; + } + return 0; + } + + /* The signs are different, so do a subtraction */ + diff = a->exp - b->exp; + if (!diff) + { + diff = a->sigh - b->sigh; /* Works only if ms bits are identical */ + if (!diff) + { + diff = a->sigl > b->sigl; + if (!diff) + diff = -(a->sigl < b->sigl); + } + } + + if (diff > 0) + { + dest->sign = a->sign; + if ( reg_u_sub(a, b, dest, control_w) ) + { + dest->sign = saved_sign; + return 1; + } + } + else if ( diff == 0 ) + { +#ifdef DENORM_OPERAND + if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) && + denormal_operand() ) + return 1; +#endif DENORM_OPERAND + reg_move(&CONST_Z, dest); + /* sign depends upon rounding mode */ + dest->sign = ((control_w & CW_RC) != RC_DOWN) + ? SIGN_POS : SIGN_NEG; + } + else + { + dest->sign = b->sign; + if ( reg_u_sub(b, a, dest, control_w) ) + { + dest->sign = saved_sign; + return 1; + } + } + return 0; + } + else + { + if ( (a->tag == TW_NaN) || (b->tag == TW_NaN) ) + { return real_2op_NaN(a, b, dest); } + else if (a->tag == TW_Zero) + { + if (b->tag == TW_Zero) + { + char different_signs = a->sign ^ b->sign; + /* Both are zero, result will be zero. */ + reg_move(a, dest); + if (different_signs) + { + /* Signs are different. */ + /* Sign of answer depends upon rounding mode. */ + dest->sign = ((control_w & CW_RC) != RC_DOWN) + ? SIGN_POS : SIGN_NEG; + } + } + else + { +#ifdef DENORM_OPERAND + if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) && + denormal_operand() ) + return 1; +#endif DENORM_OPERAND + reg_move(b, dest); + } + return 0; + } + else if (b->tag == TW_Zero) + { +#ifdef DENORM_OPERAND + if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) && + denormal_operand() ) + return 1; +#endif DENORM_OPERAND + reg_move(a, dest); return 0; + } + else if (a->tag == TW_Infinity) + { + if (b->tag != TW_Infinity) + { +#ifdef DENORM_OPERAND + if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) && + denormal_operand() ) + return 1; +#endif DENORM_OPERAND + reg_move(a, dest); return 0; + } + if (a->sign == b->sign) + { + /* They are both + or - infinity */ + reg_move(a, dest); return 0; + } + return arith_invalid(dest); /* Infinity-Infinity is undefined. */ + } + else if (b->tag == TW_Infinity) + { +#ifdef DENORM_OPERAND + if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) && + denormal_operand() ) + return 1; +#endif DENORM_OPERAND + reg_move(b, dest); return 0; + } + } +#ifdef PARANOID + EXCEPTION(EX_INTERNAL|0x101); +#endif + return 1; +} + + +/* Subtract b from a. (a-b) -> dest */ +int reg_sub(FPU_REG const *a, FPU_REG const *b, FPU_REG *dest, int control_w) +{ + char saved_sign = dest->sign; + int diff; + + if ( !(a->tag | b->tag) ) + { + /* Both registers are valid */ + diff = a->exp - b->exp; + if (!diff) + { + diff = a->sigh - b->sigh; /* Works only if ms bits are identical */ + if (!diff) + { + diff = a->sigl > b->sigl; + if (!diff) + diff = -(a->sigl < b->sigl); + } + } + + switch (a->sign*2 + b->sign) + { + case 0: /* P - P */ + case 3: /* N - N */ + if (diff > 0) + { + /* |a| > |b| */ + dest->sign = a->sign; + if ( reg_u_sub(a, b, dest, control_w) ) + { + dest->sign = saved_sign; + return 1; + } + return 0; + } + else if ( diff == 0 ) + { +#ifdef DENORM_OPERAND + if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) && + denormal_operand() ) + return 1; +#endif DENORM_OPERAND + reg_move(&CONST_Z, dest); + /* sign depends upon rounding mode */ + dest->sign = ((control_w & CW_RC) != RC_DOWN) + ? SIGN_POS : SIGN_NEG; + } + else + { + dest->sign = a->sign ^ SIGN_POS^SIGN_NEG; + if ( reg_u_sub(b, a, dest, control_w) ) + { + dest->sign = saved_sign; + return 1; + } + } + break; + case 1: /* P - N */ + dest->sign = SIGN_POS; + if ( reg_u_add(a, b, dest, control_w) ) + { + dest->sign = saved_sign; + return 1; + } + break; + case 2: /* N - P */ + dest->sign = SIGN_NEG; + if ( reg_u_add(a, b, dest, control_w) ) + { + dest->sign = saved_sign; + return 1; + } + break; + } + return 0; + } + else + { + if ( (a->tag == TW_NaN) || (b->tag == TW_NaN) ) + { return real_2op_NaN(b, a, dest); } + else if (b->tag == TW_Zero) + { + if (a->tag == TW_Zero) + { + char same_signs = !(a->sign ^ b->sign); + /* Both are zero, result will be zero. */ + reg_move(a, dest); /* Answer for different signs. */ + if (same_signs) + { + /* Sign depends upon rounding mode */ + dest->sign = ((control_w & CW_RC) != RC_DOWN) + ? SIGN_POS : SIGN_NEG; + } + } + else + { +#ifdef DENORM_OPERAND + if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) && + denormal_operand() ) + return 1; +#endif DENORM_OPERAND + reg_move(a, dest); + } + return 0; + } + else if (a->tag == TW_Zero) + { +#ifdef DENORM_OPERAND + if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) && + denormal_operand() ) + return 1; +#endif DENORM_OPERAND + reg_move(b, dest); + dest->sign ^= SIGN_POS^SIGN_NEG; + return 0; + } + else if (a->tag == TW_Infinity) + { + if (b->tag != TW_Infinity) + { +#ifdef DENORM_OPERAND + if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) && + denormal_operand() ) + return 1; +#endif DENORM_OPERAND + reg_move(a, dest); return 0; + } + /* Both args are Infinity */ + if (a->sign == b->sign) + { + /* Infinity-Infinity is undefined. */ + return arith_invalid(dest); + } + reg_move(a, dest); + return 0; + } + else if (b->tag == TW_Infinity) + { +#ifdef DENORM_OPERAND + if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) && + denormal_operand() ) + return 1; +#endif DENORM_OPERAND + reg_move(b, dest); + dest->sign ^= SIGN_POS^SIGN_NEG; + return 0; + } + } +#ifdef PARANOID + EXCEPTION(EX_INTERNAL|0x110); +#endif + return 1; +} + diff --git a/arch/i386/math-emu/reg_compare.c b/arch/i386/math-emu/reg_compare.c new file mode 100644 index 000000000..eb4a1fa99 --- /dev/null +++ b/arch/i386/math-emu/reg_compare.c @@ -0,0 +1,378 @@ +/*---------------------------------------------------------------------------+ + | reg_compare.c | + | | + | Compare two floating point registers | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | compare() is the core FPU_REG comparison function | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "status_w.h" + + +int compare(FPU_REG const *b) +{ + int diff; + char st0_tag; + FPU_REG *st0_ptr; + + st0_ptr = &st(0); + st0_tag = st0_ptr->tag; + + if ( st0_tag | b->tag ) + { + if ( st0_tag == TW_Zero ) + { + if ( b->tag == TW_Zero ) return COMP_A_eq_B; + if ( b->tag == TW_Valid ) + { + return ((b->sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) +#ifdef DENORM_OPERAND + | ((b->exp <= EXP_UNDER) ? + COMP_Denormal : 0) +#endif DENORM_OPERAND + ; + } + } + else if ( b->tag == TW_Zero ) + { + if ( st0_tag == TW_Valid ) + { + return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B + : COMP_A_lt_B) +#ifdef DENORM_OPERAND + | ((st0_ptr->exp <= EXP_UNDER ) + ? COMP_Denormal : 0 ) +#endif DENORM_OPERAND + ; + } + } + + if ( st0_tag == TW_Infinity ) + { + if ( (b->tag == TW_Valid) || (b->tag == TW_Zero) ) + { + return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B + : COMP_A_lt_B) +#ifdef DENORM_OPERAND + | (((b->tag == TW_Valid) && (b->exp <= EXP_UNDER)) ? + COMP_Denormal : 0 ) +#endif DENORM_OPERAND +; + } + else if ( b->tag == TW_Infinity ) + { + /* The 80486 book says that infinities can be equal! */ + return (st0_ptr->sign == b->sign) ? COMP_A_eq_B : + ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); + } + /* Fall through to the NaN code */ + } + else if ( b->tag == TW_Infinity ) + { + if ( (st0_tag == TW_Valid) || (st0_tag == TW_Zero) ) + { + return ((b->sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) +#ifdef DENORM_OPERAND + | (((st0_tag == TW_Valid) + && (st0_ptr->exp <= EXP_UNDER)) ? + COMP_Denormal : 0) +#endif DENORM_OPERAND + ; + } + /* Fall through to the NaN code */ + } + + /* The only possibility now should be that one of the arguments + is a NaN */ + if ( (st0_tag == TW_NaN) || (b->tag == TW_NaN) ) + { + if ( ((st0_tag == TW_NaN) && !(st0_ptr->sigh & 0x40000000)) + || ((b->tag == TW_NaN) && !(b->sigh & 0x40000000)) ) + /* At least one arg is a signaling NaN */ + return COMP_No_Comp | COMP_SNaN | COMP_NaN; + else + /* Neither is a signaling NaN */ + return COMP_No_Comp | COMP_NaN; + } + + EXCEPTION(EX_Invalid); + } + +#ifdef PARANOID + if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid); + if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid); +#endif PARANOID + + + if (st0_ptr->sign != b->sign) + { + return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) +#ifdef DENORM_OPERAND + | + ( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ? + COMP_Denormal : 0) +#endif DENORM_OPERAND + ; + } + + diff = st0_ptr->exp - b->exp; + if ( diff == 0 ) + { + diff = st0_ptr->sigh - b->sigh; /* Works only if ms bits are + identical */ + if ( diff == 0 ) + { + diff = st0_ptr->sigl > b->sigl; + if ( diff == 0 ) + diff = -(st0_ptr->sigl < b->sigl); + } + } + + if ( diff > 0 ) + { + return ((st0_ptr->sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) +#ifdef DENORM_OPERAND + | + ( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ? + COMP_Denormal : 0) +#endif DENORM_OPERAND + ; + } + if ( diff < 0 ) + { + return ((st0_ptr->sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) +#ifdef DENORM_OPERAND + | + ( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ? + COMP_Denormal : 0) +#endif DENORM_OPERAND + ; + } + + return COMP_A_eq_B +#ifdef DENORM_OPERAND + | + ( ((st0_ptr->exp <= EXP_UNDER) || (b->exp <= EXP_UNDER)) ? + COMP_Denormal : 0) +#endif DENORM_OPERAND + ; + +} + + +/* This function requires that st(0) is not empty */ +int compare_st_data(FPU_REG const *loaded_data) +{ + int f, c; + + c = compare(loaded_data); + + if (c & COMP_NaN) + { + EXCEPTION(EX_Invalid); + f = SW_C3 | SW_C2 | SW_C0; + } + else + switch (c & 7) + { + case COMP_A_lt_B: + f = SW_C0; + break; + case COMP_A_eq_B: + f = SW_C3; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = SW_C3 | SW_C2 | SW_C0; + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x121); + f = SW_C3 | SW_C2 | SW_C0; + break; +#endif PARANOID + } + setcc(f); + if (c & COMP_Denormal) + { + return denormal_operand(); + } + return 0; +} + + +static int compare_st_st(int nr) +{ + int f, c; + + if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) ) + { + setcc(SW_C3 | SW_C2 | SW_C0); + /* Stack fault */ + EXCEPTION(EX_StackUnder); + return !(control_word & CW_Invalid); + } + + c = compare(&st(nr)); + if (c & COMP_NaN) + { + setcc(SW_C3 | SW_C2 | SW_C0); + EXCEPTION(EX_Invalid); + return !(control_word & CW_Invalid); + } + else + switch (c & 7) + { + case COMP_A_lt_B: + f = SW_C0; + break; + case COMP_A_eq_B: + f = SW_C3; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = SW_C3 | SW_C2 | SW_C0; + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x122); + f = SW_C3 | SW_C2 | SW_C0; + break; +#endif PARANOID + } + setcc(f); + if (c & COMP_Denormal) + { + return denormal_operand(); + } + return 0; +} + + +static int compare_u_st_st(int nr) +{ + int f, c; + + if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) ) + { + setcc(SW_C3 | SW_C2 | SW_C0); + /* Stack fault */ + EXCEPTION(EX_StackUnder); + return !(control_word & CW_Invalid); + } + + c = compare(&st(nr)); + if (c & COMP_NaN) + { + setcc(SW_C3 | SW_C2 | SW_C0); + if (c & COMP_SNaN) /* This is the only difference between + un-ordered and ordinary comparisons */ + { + EXCEPTION(EX_Invalid); + return !(control_word & CW_Invalid); + } + return 0; + } + else + switch (c & 7) + { + case COMP_A_lt_B: + f = SW_C0; + break; + case COMP_A_eq_B: + f = SW_C3; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = SW_C3 | SW_C2 | SW_C0; + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x123); + f = SW_C3 | SW_C2 | SW_C0; + break; +#endif PARANOID + } + setcc(f); + if (c & COMP_Denormal) + { + return denormal_operand(); + } + return 0; +} + +/*---------------------------------------------------------------------------*/ + +void fcom_st() +{ + /* fcom st(i) */ + compare_st_st(FPU_rm); +} + + +void fcompst() +{ + /* fcomp st(i) */ + if ( !compare_st_st(FPU_rm) ) + pop(); +} + + +void fcompp() +{ + /* fcompp */ + if (FPU_rm != 1) + { + FPU_illegal(); + return; + } + if ( !compare_st_st(1) ) + poppop(); +} + + +void fucom_() +{ + /* fucom st(i) */ + compare_u_st_st(FPU_rm); + +} + + +void fucomp() +{ + /* fucomp st(i) */ + if ( !compare_u_st_st(FPU_rm) ) + pop(); +} + + +void fucompp() +{ + /* fucompp */ + if (FPU_rm == 1) + { + if ( !compare_u_st_st(1) ) + poppop(); + } + else + FPU_illegal(); +} diff --git a/arch/i386/math-emu/reg_constant.c b/arch/i386/math-emu/reg_constant.c new file mode 100644 index 000000000..c1981ce24 --- /dev/null +++ b/arch/i386/math-emu/reg_constant.c @@ -0,0 +1,116 @@ +/*---------------------------------------------------------------------------+ + | reg_constant.c | + | | + | All of the constant FPU_REGs | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "reg_constant.h" + + +FPU_REG const CONST_1 = { SIGN_POS, TW_Valid, EXP_BIAS, + 0x00000000, 0x80000000 }; +FPU_REG const CONST_2 = { SIGN_POS, TW_Valid, EXP_BIAS+1, + 0x00000000, 0x80000000 }; +FPU_REG const CONST_HALF = { SIGN_POS, TW_Valid, EXP_BIAS-1, + 0x00000000, 0x80000000 }; +FPU_REG const CONST_L2T = { SIGN_POS, TW_Valid, EXP_BIAS+1, + 0xcd1b8afe, 0xd49a784b }; +FPU_REG const CONST_L2E = { SIGN_POS, TW_Valid, EXP_BIAS, + 0x5c17f0bc, 0xb8aa3b29 }; +FPU_REG const CONST_PI = { SIGN_POS, TW_Valid, EXP_BIAS+1, + 0x2168c235, 0xc90fdaa2 }; +FPU_REG const CONST_PI2 = { SIGN_POS, TW_Valid, EXP_BIAS, + 0x2168c235, 0xc90fdaa2 }; +FPU_REG const CONST_PI4 = { SIGN_POS, TW_Valid, EXP_BIAS-1, + 0x2168c235, 0xc90fdaa2 }; +FPU_REG const CONST_LG2 = { SIGN_POS, TW_Valid, EXP_BIAS-2, + 0xfbcff799, 0x9a209a84 }; +FPU_REG const CONST_LN2 = { SIGN_POS, TW_Valid, EXP_BIAS-1, + 0xd1cf79ac, 0xb17217f7 }; + +/* Extra bits to take pi/2 to more than 128 bits precision. */ +FPU_REG const CONST_PI2extra = { SIGN_NEG, TW_Valid, EXP_BIAS-66, + 0xfc8f8cbb, 0xece675d1 }; + +/* Only the sign (and tag) is used in internal zeroes */ +FPU_REG const CONST_Z = { SIGN_POS, TW_Zero, EXP_UNDER, 0x0, 0x0 }; + +/* Only the sign and significand (and tag) are used in internal NaNs */ +/* The 80486 never generates one of these +FPU_REG const CONST_SNAN = { SIGN_POS, TW_NaN, EXP_OVER, 0x00000001, 0x80000000 }; + */ +/* This is the real indefinite QNaN */ +FPU_REG const CONST_QNaN = { SIGN_NEG, TW_NaN, EXP_OVER, 0x00000000, 0xC0000000 }; + +/* Only the sign (and tag) is used in internal infinities */ +FPU_REG const CONST_INF = { SIGN_POS, TW_Infinity, EXP_OVER, 0x00000000, 0x80000000 }; + + + +static void fld_const(FPU_REG const *c) +{ + FPU_REG *st_new_ptr; + + if ( STACK_OVERFLOW ) + { + stack_overflow(); + return; + } + push(); + reg_move(c, st_new_ptr); + clear_C1(); +} + + +static void fld1(void) +{ + fld_const(&CONST_1); +} + +static void fldl2t(void) +{ + fld_const(&CONST_L2T); +} + +static void fldl2e(void) +{ + fld_const(&CONST_L2E); +} + +static void fldpi(void) +{ + fld_const(&CONST_PI); +} + +static void fldlg2(void) +{ + fld_const(&CONST_LG2); +} + +static void fldln2(void) +{ + fld_const(&CONST_LN2); +} + +static void fldz(void) +{ + fld_const(&CONST_Z); +} + +static FUNC constants_table[] = { + fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, FPU_illegal +}; + +void fconst(void) +{ + (constants_table[FPU_rm])(); +} diff --git a/arch/i386/math-emu/reg_constant.h b/arch/i386/math-emu/reg_constant.h new file mode 100644 index 000000000..b7db97e34 --- /dev/null +++ b/arch/i386/math-emu/reg_constant.h @@ -0,0 +1,31 @@ +/*---------------------------------------------------------------------------+ + | reg_constant.h | + | | + | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _REG_CONSTANT_H_ +#define _REG_CONSTANT_H_ + +#include "fpu_emu.h" + +extern FPU_REG const CONST_1; +extern FPU_REG const CONST_2; +extern FPU_REG const CONST_HALF; +extern FPU_REG const CONST_L2T; +extern FPU_REG const CONST_L2E; +extern FPU_REG const CONST_PI; +extern FPU_REG const CONST_PI2; +extern FPU_REG const CONST_PI2extra; +extern FPU_REG const CONST_PI4; +extern FPU_REG const CONST_LG2; +extern FPU_REG const CONST_LN2; +extern FPU_REG const CONST_Z; +extern FPU_REG const CONST_PINF; +extern FPU_REG const CONST_INF; +extern FPU_REG const CONST_MINF; +extern FPU_REG const CONST_QNaN; + +#endif _REG_CONSTANT_H_ diff --git a/arch/i386/math-emu/reg_div.S b/arch/i386/math-emu/reg_div.S new file mode 100644 index 000000000..2fbc5f7c4 --- /dev/null +++ b/arch/i386/math-emu/reg_div.S @@ -0,0 +1,251 @@ + .file "reg_div.S" +/*---------------------------------------------------------------------------+ + | reg_div.S | + | | + | Divide one FPU_REG by another and put the result in a destination FPU_REG.| + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | Call from C as: | + | void reg_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest, | + | unsigned int control_word) | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_asm.h" + + +.text + .align 2 + +.globl _reg_div +_reg_div: + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $28,%esp /* Needed by divide_kernel */ +#endif NON_REENTRANT_FPU + + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi + movl PARAM2,%ebx + movl PARAM3,%edi + + movb TAG(%esi),%al + orb TAG(%ebx),%al + + jne L_div_special /* Not (both numbers TW_Valid) */ + +#ifdef DENORM_OPERAND +/* Check for denormals */ + cmpl EXP_UNDER,EXP(%esi) + jg xL_arg1_not_denormal + + call _denormal_operand + orl %eax,%eax + jnz fpu_Arith_exit + +xL_arg1_not_denormal: + cmpl EXP_UNDER,EXP(%ebx) + jg xL_arg2_not_denormal + + call _denormal_operand + orl %eax,%eax + jnz fpu_Arith_exit + +xL_arg2_not_denormal: +#endif DENORM_OPERAND + +/* Both arguments are TW_Valid */ + movb TW_Valid,TAG(%edi) + + movb SIGN(%esi),%cl + cmpb %cl,SIGN(%ebx) + setne (%edi) /* Set the sign, requires SIGN_NEG=1, SIGN_POS=0 */ + + movl EXP(%esi),%edx + movl EXP(%ebx),%eax + subl %eax,%edx + addl EXP_BIAS,%edx + movl %edx,EXP(%edi) + + jmp _divide_kernel + + +/*-----------------------------------------------------------------------*/ +L_div_special: + cmpb TW_NaN,TAG(%esi) /* A NaN with anything to give NaN */ + je L_arg1_NaN + + cmpb TW_NaN,TAG(%ebx) /* A NaN with anything to give NaN */ + jne L_no_NaN_arg + +/* Operations on NaNs */ +L_arg1_NaN: +L_arg2_NaN: + pushl %edi /* Destination */ + pushl %esi + pushl %ebx /* Ordering is important here */ + call _real_2op_NaN + jmp LDiv_exit + +/* Invalid operations */ +L_zero_zero: +L_inf_inf: + pushl %edi /* Destination */ + call _arith_invalid /* 0/0 or Infinity/Infinity */ + jmp LDiv_exit + +L_no_NaN_arg: + cmpb TW_Infinity,TAG(%esi) + jne L_arg1_not_inf + + cmpb TW_Infinity,TAG(%ebx) + je L_inf_inf /* invalid operation */ + + cmpb TW_Valid,TAG(%ebx) + je L_inf_valid + +#ifdef PARANOID + /* arg2 must be zero or valid */ + cmpb TW_Zero,TAG(%ebx) + ja L_unknown_tags +#endif PARANOID + + /* Note that p16-9 says that infinity/0 returns infinity */ + jmp L_copy_arg1 /* Answer is Inf */ + +L_inf_valid: +#ifdef DENORM_OPERAND + cmpl EXP_UNDER,EXP(%ebx) + jg L_copy_arg1 /* Answer is Inf */ + + call _denormal_operand + orl %eax,%eax + jnz fpu_Arith_exit +#endif DENORM_OPERAND + + jmp L_copy_arg1 /* Answer is Inf */ + +L_arg1_not_inf: + cmpb TW_Zero,TAG(%ebx) /* Priority to div-by-zero error */ + jne L_arg2_not_zero + + cmpb TW_Zero,TAG(%esi) + je L_zero_zero /* invalid operation */ + +#ifdef PARANOID + /* arg1 must be valid */ + cmpb TW_Valid,TAG(%esi) + ja L_unknown_tags +#endif PARANOID + +/* Division by zero error */ + pushl %edi /* destination */ + movb SIGN(%esi),%al + xorb SIGN(%ebx),%al + pushl %eax /* lower 8 bits have the sign */ + call _divide_by_zero + jmp LDiv_exit + +L_arg2_not_zero: + cmpb TW_Infinity,TAG(%ebx) + jne L_arg2_not_inf + +#ifdef DENORM_OPERAND + cmpb TW_Valid,TAG(%esi) + jne L_return_zero + + cmpl EXP_UNDER,EXP(%esi) + jg L_return_zero /* Answer is zero */ + + call _denormal_operand + orl %eax,%eax + jnz fpu_Arith_exit +#endif DENORM_OPERAND + + jmp L_return_zero /* Answer is zero */ + +L_arg2_not_inf: + +#ifdef PARANOID + cmpb TW_Zero,TAG(%esi) + jne L_unknown_tags +#endif PARANOID + + /* arg1 is zero, arg2 is not Infinity or a NaN */ + +#ifdef DENORM_OPERAND + cmpl EXP_UNDER,EXP(%ebx) + jg L_copy_arg1 /* Answer is zero */ + + call _denormal_operand + orl %eax,%eax + jnz fpu_Arith_exit +#endif DENORM_OPERAND + +L_copy_arg1: + movb TAG(%esi),%ax + movb %ax,TAG(%edi) + movl EXP(%esi),%eax + movl %eax,EXP(%edi) + movl SIGL(%esi),%eax + movl %eax,SIGL(%edi) + movl SIGH(%esi),%eax + movl %eax,SIGH(%edi) + +LDiv_set_result_sign: + movb SIGN(%esi),%cl + cmpb %cl,SIGN(%ebx) + jne LDiv_negative_result + + movb SIGN_POS,SIGN(%edi) + xorl %eax,%eax /* Valid result */ + jmp LDiv_exit + +LDiv_negative_result: + movb SIGN_NEG,SIGN(%edi) + xorl %eax,%eax /* Valid result */ + +LDiv_exit: +#ifndef NON_REENTRANT_FPU + leal -40(%ebp),%esp +#else + leal -12(%ebp),%esp +#endif NON_REENTRANT_FPU + + popl %ebx + popl %edi + popl %esi + leave + ret + + +L_return_zero: + xorl %eax,%eax + movl %eax,SIGH(%edi) + movl %eax,SIGL(%edi) + movl EXP_UNDER,EXP(%edi) + movb TW_Zero,TAG(%edi) + jmp LDiv_set_result_sign + +#ifdef PARANOID +L_unknown_tags: + pushl EX_INTERNAL | 0x208 + call EXCEPTION + + /* Generate a NaN for unknown tags */ + movl _CONST_QNaN,%eax + movl %eax,(%edi) + movl _CONST_QNaN+4,%eax + movl %eax,SIGL(%edi) + movl _CONST_QNaN+8,%eax + movl %eax,SIGH(%edi) + jmp LDiv_exit /* %eax is nz */ +#endif PARANOID diff --git a/arch/i386/math-emu/reg_ld_str.c b/arch/i386/math-emu/reg_ld_str.c new file mode 100644 index 000000000..efec9e010 --- /dev/null +++ b/arch/i386/math-emu/reg_ld_str.c @@ -0,0 +1,1438 @@ +/*---------------------------------------------------------------------------+ + | reg_ld_str.c | + | | + | All of the functions which transfer data between user memory and FPU_REGs.| + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +#include <asm/segment.h> + +#include "fpu_system.h" +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "status_w.h" + + +#define EXTENDED_Ebias 0x3fff +#define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */ + +#define DOUBLE_Emax 1023 /* largest valid exponent */ +#define DOUBLE_Ebias 1023 +#define DOUBLE_Emin (-1022) /* smallest valid exponent */ + +#define SINGLE_Emax 127 /* largest valid exponent */ +#define SINGLE_Ebias 127 +#define SINGLE_Emin (-126) /* smallest valid exponent */ + +static void write_to_extended(FPU_REG *rp, char *d); + + +/* Get a long double from user memory */ +int reg_load_extended(long double *s, FPU_REG *loaded_data) +{ + unsigned long sigl, sigh, exp; + + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_READ, s, 10); + sigl = get_fs_long((unsigned long *) s); + sigh = get_fs_long(1 + (unsigned long *) s); + exp = get_fs_word(4 + (unsigned short *) s); + RE_ENTRANT_CHECK_ON; + + loaded_data->tag = TW_Valid; /* Default */ + loaded_data->sigl = sigl; + loaded_data->sigh = sigh; + if (exp & 0x8000) + loaded_data->sign = SIGN_NEG; + else + loaded_data->sign = SIGN_POS; + exp &= 0x7fff; + loaded_data->exp = exp - EXTENDED_Ebias + EXP_BIAS; + + if ( exp == 0 ) + { + if ( !(sigh | sigl) ) + { + loaded_data->tag = TW_Zero; + return 0; + } + /* The number is a de-normal or pseudodenormal. */ + if (sigh & 0x80000000) + { + /* Is a pseudodenormal. */ + /* Convert it for internal use. */ + /* This is non-80486 behaviour because the number + loses its 'denormal' identity. */ + loaded_data->exp++; + return 1; + } + else + { + /* Is a denormal. */ + /* Convert it for internal use. */ + loaded_data->exp++; + normalize_nuo(loaded_data); + return 0; + } + } + else if ( exp == 0x7fff ) + { + if ( !((sigh ^ 0x80000000) | sigl) ) + { + /* Matches the bit pattern for Infinity. */ + loaded_data->exp = EXP_Infinity; + loaded_data->tag = TW_Infinity; + return 0; + } + + loaded_data->exp = EXP_NaN; + loaded_data->tag = TW_NaN; + if ( !(sigh & 0x80000000) ) + { + /* NaNs have the ms bit set to 1. */ + /* This is therefore an Unsupported NaN data type. */ + /* This is non 80486 behaviour */ + /* This should generate an Invalid Operand exception + later, so we convert it to a SNaN */ + loaded_data->sigh = 0x80000000; + loaded_data->sigl = 0x00000001; + loaded_data->sign = SIGN_NEG; + return 1; + } + return 0; + } + + if ( !(sigh & 0x80000000) ) + { + /* Unsupported data type. */ + /* Valid numbers have the ms bit set to 1. */ + /* Unnormal. */ + /* Convert it for internal use. */ + /* This is non-80486 behaviour */ + /* This should generate an Invalid Operand exception + later, so we convert it to a SNaN */ + loaded_data->sigh = 0x80000000; + loaded_data->sigl = 0x00000001; + loaded_data->sign = SIGN_NEG; + loaded_data->exp = EXP_NaN; + loaded_data->tag = TW_NaN; + return 1; + } + return 0; +} + + +/* Get a double from user memory */ +int reg_load_double(double *dfloat, FPU_REG *loaded_data) +{ + int exp; + unsigned m64, l64; + + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_READ, dfloat, 8); + m64 = get_fs_long(1 + (unsigned long *) dfloat); + l64 = get_fs_long((unsigned long *) dfloat); + RE_ENTRANT_CHECK_ON; + + if (m64 & 0x80000000) + loaded_data->sign = SIGN_NEG; + else + loaded_data->sign = SIGN_POS; + exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias; + m64 &= 0xfffff; + if (exp > DOUBLE_Emax) + { + /* Infinity or NaN */ + if ((m64 == 0) && (l64 == 0)) + { + /* +- infinity */ + loaded_data->sigh = 0x80000000; + loaded_data->sigl = 0x00000000; + loaded_data->exp = EXP_Infinity; + loaded_data->tag = TW_Infinity; + return 0; + } + else + { + /* Must be a signaling or quiet NaN */ + loaded_data->exp = EXP_NaN; + loaded_data->tag = TW_NaN; + loaded_data->sigh = (m64 << 11) | 0x80000000; + loaded_data->sigh |= l64 >> 21; + loaded_data->sigl = l64 << 11; + return 0; /* The calling function must look for NaNs */ + } + } + else if ( exp < DOUBLE_Emin ) + { + /* Zero or de-normal */ + if ((m64 == 0) && (l64 == 0)) + { + /* Zero */ + int c = loaded_data->sign; + reg_move(&CONST_Z, loaded_data); + loaded_data->sign = c; + return 0; + } + else + { + /* De-normal */ + loaded_data->exp = DOUBLE_Emin + EXP_BIAS; + loaded_data->tag = TW_Valid; + loaded_data->sigh = m64 << 11; + loaded_data->sigh |= l64 >> 21; + loaded_data->sigl = l64 << 11; + normalize_nuo(loaded_data); + return denormal_operand(); + } + } + else + { + loaded_data->exp = exp + EXP_BIAS; + loaded_data->tag = TW_Valid; + loaded_data->sigh = (m64 << 11) | 0x80000000; + loaded_data->sigh |= l64 >> 21; + loaded_data->sigl = l64 << 11; + + return 0; + } +} + + +/* Get a float from user memory */ +int reg_load_single(float *single, FPU_REG *loaded_data) +{ + unsigned m32; + int exp; + + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_READ, single, 4); + m32 = get_fs_long((unsigned long *) single); + RE_ENTRANT_CHECK_ON; + + if (m32 & 0x80000000) + loaded_data->sign = SIGN_NEG; + else + loaded_data->sign = SIGN_POS; + if (!(m32 & 0x7fffffff)) + { + /* Zero */ + int c = loaded_data->sign; + reg_move(&CONST_Z, loaded_data); + loaded_data->sign = c; + return 0; + } + exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias; + m32 = (m32 & 0x7fffff) << 8; + if ( exp < SINGLE_Emin ) + { + /* De-normals */ + loaded_data->exp = SINGLE_Emin + EXP_BIAS; + loaded_data->tag = TW_Valid; + loaded_data->sigh = m32; + loaded_data->sigl = 0; + normalize_nuo(loaded_data); + return denormal_operand(); + } + else if ( exp > SINGLE_Emax ) + { + /* Infinity or NaN */ + if ( m32 == 0 ) + { + /* +- infinity */ + loaded_data->sigh = 0x80000000; + loaded_data->sigl = 0x00000000; + loaded_data->exp = EXP_Infinity; + loaded_data->tag = TW_Infinity; + return 0; + } + else + { + /* Must be a signaling or quiet NaN */ + loaded_data->exp = EXP_NaN; + loaded_data->tag = TW_NaN; + loaded_data->sigh = m32 | 0x80000000; + loaded_data->sigl = 0; + return 0; /* The calling function must look for NaNs */ + } + } + else + { + loaded_data->exp = exp + EXP_BIAS; + loaded_data->sigh = m32 | 0x80000000; + loaded_data->sigl = 0; + loaded_data->tag = TW_Valid; + return 0; + } +} + + +/* Get a long long from user memory */ +void reg_load_int64(long long *_s, FPU_REG *loaded_data) +{ + int e; + long long s; + + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_READ, _s, 8); + ((unsigned long *)&s)[0] = get_fs_long((unsigned long *) _s); + ((unsigned long *)&s)[1] = get_fs_long(1 + (unsigned long *) _s); + RE_ENTRANT_CHECK_ON; + + if (s == 0) + { reg_move(&CONST_Z, loaded_data); return; } + + if (s > 0) + loaded_data->sign = SIGN_POS; + else + { + s = -s; + loaded_data->sign = SIGN_NEG; + } + + e = EXP_BIAS + 63; + significand(loaded_data) = s; + loaded_data->exp = e; + loaded_data->tag = TW_Valid; + normalize_nuo(loaded_data); +} + + +/* Get a long from user memory */ +void reg_load_int32(long *_s, FPU_REG *loaded_data) +{ + long s; + int e; + + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_READ, _s, 4); + s = (long)get_fs_long((unsigned long *) _s); + RE_ENTRANT_CHECK_ON; + + if (s == 0) + { reg_move(&CONST_Z, loaded_data); return; } + + if (s > 0) + loaded_data->sign = SIGN_POS; + else + { + s = -s; + loaded_data->sign = SIGN_NEG; + } + + e = EXP_BIAS + 31; + loaded_data->sigh = s; + loaded_data->sigl = 0; + loaded_data->exp = e; + loaded_data->tag = TW_Valid; + normalize_nuo(loaded_data); +} + + +/* Get a short from user memory */ +void reg_load_int16(short *_s, FPU_REG *loaded_data) +{ + int s, e; + + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_READ, _s, 2); + /* Cast as short to get the sign extended. */ + s = (short)get_fs_word((unsigned short *) _s); + RE_ENTRANT_CHECK_ON; + + if (s == 0) + { reg_move(&CONST_Z, loaded_data); return; } + + if (s > 0) + loaded_data->sign = SIGN_POS; + else + { + s = -s; + loaded_data->sign = SIGN_NEG; + } + + e = EXP_BIAS + 15; + loaded_data->sigh = s << 16; + + loaded_data->sigl = 0; + loaded_data->exp = e; + loaded_data->tag = TW_Valid; + normalize_nuo(loaded_data); +} + + +/* Get a packed bcd array from user memory */ +void reg_load_bcd(char *s, FPU_REG *loaded_data) +{ + int pos; + unsigned char bcd; + long long l=0; + + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_READ, s, 10); + RE_ENTRANT_CHECK_ON; + for ( pos = 8; pos >= 0; pos--) + { + l *= 10; + RE_ENTRANT_CHECK_OFF; + bcd = (unsigned char)get_fs_byte((unsigned char *) s+pos); + RE_ENTRANT_CHECK_ON; + l += bcd >> 4; + l *= 10; + l += bcd & 0x0f; + } + + RE_ENTRANT_CHECK_OFF; + loaded_data->sign = + ((unsigned char)get_fs_byte((unsigned char *) s+9)) & 0x80 ? + SIGN_NEG : SIGN_POS; + RE_ENTRANT_CHECK_ON; + + if (l == 0) + { + char sign = loaded_data->sign; + reg_move(&CONST_Z, loaded_data); + loaded_data->sign = sign; + } + else + { + significand(loaded_data) = l; + loaded_data->exp = EXP_BIAS + 63; + loaded_data->tag = TW_Valid; + normalize_nuo(loaded_data); + } +} + +/*===========================================================================*/ + +/* Put a long double into user memory */ +int reg_store_extended(long double *d, FPU_REG *st0_ptr) +{ + /* + The only exception raised by an attempt to store to an + extended format is the Invalid Stack exception, i.e. + attempting to store from an empty register. + */ + + if ( st0_ptr->tag != TW_Empty ) + { + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE, d, 10); + RE_ENTRANT_CHECK_ON; + write_to_extended(st0_ptr, (char *) d); + return 1; + } + + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + if ( control_word & CW_Invalid ) + { + /* The masked response */ + /* Put out the QNaN indefinite */ + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE,d,10); + put_fs_long(0, (unsigned long *) d); + put_fs_long(0xc0000000, 1 + (unsigned long *) d); + put_fs_word(0xffff, 4 + (short *) d); + RE_ENTRANT_CHECK_ON; + return 1; + } + else + return 0; + +} + + +/* Put a double into user memory */ +int reg_store_double(double *dfloat, FPU_REG *st0_ptr) +{ + unsigned long l[2]; + unsigned long increment = 0; /* avoid gcc warnings */ + char st0_tag = st0_ptr->tag; + + if (st0_tag == TW_Valid) + { + int exp; + FPU_REG tmp; + + reg_move(st0_ptr, &tmp); + exp = tmp.exp - EXP_BIAS; + + if ( exp < DOUBLE_Emin ) /* It may be a denormal */ + { + int precision_loss; + + /* A denormal will always underflow. */ +#ifndef PECULIAR_486 + /* An 80486 is supposed to be able to generate + a denormal exception here, but... */ + if ( st0_ptr->exp <= EXP_UNDER ) + { + /* Underflow has priority. */ + if ( control_word & CW_Underflow ) + denormal_operand(); + } +#endif PECULIAR_486 + + tmp.exp += -DOUBLE_Emin + 52; /* largest exp to be 51 */ + + if ( (precision_loss = round_to_int(&tmp)) ) + { +#ifdef PECULIAR_486 + /* Did it round to a non-denormal ? */ + /* This behaviour might be regarded as peculiar, it appears + that the 80486 rounds to the dest precision, then + converts to decide underflow. */ + if ( !((tmp.sigh == 0x00100000) && (tmp.sigl == 0) && + (st0_ptr->sigl & 0x000007ff)) ) +#endif PECULIAR_486 + { + EXCEPTION(EX_Underflow); + /* This is a special case: see sec 16.2.5.1 of + the 80486 book */ + if ( !(control_word & CW_Underflow) ) + return 0; + } + EXCEPTION(precision_loss); + if ( !(control_word & CW_Precision) ) + return 0; + } + l[0] = tmp.sigl; + l[1] = tmp.sigh; + } + else + { + if ( tmp.sigl & 0x000007ff ) + { + switch (control_word & CW_RC) + { + case RC_RND: + /* Rounding can get a little messy.. */ + increment = ((tmp.sigl & 0x7ff) > 0x400) | /* nearest */ + ((tmp.sigl & 0xc00) == 0xc00); /* odd -> even */ + break; + case RC_DOWN: /* towards -infinity */ + increment = (tmp.sign == SIGN_POS) ? 0 : tmp.sigl & 0x7ff; + break; + case RC_UP: /* towards +infinity */ + increment = (tmp.sign == SIGN_POS) ? tmp.sigl & 0x7ff : 0; + break; + case RC_CHOP: + increment = 0; + break; + } + + /* Truncate the mantissa */ + tmp.sigl &= 0xfffff800; + + if ( increment ) + { + set_precision_flag_up(); + + if ( tmp.sigl >= 0xfffff800 ) + { + /* the sigl part overflows */ + if ( tmp.sigh == 0xffffffff ) + { + /* The sigh part overflows */ + tmp.sigh = 0x80000000; + exp++; + if (exp >= EXP_OVER) + goto overflow; + } + else + { + tmp.sigh ++; + } + tmp.sigl = 0x00000000; + } + else + { + /* We only need to increment sigl */ + tmp.sigl += 0x00000800; + } + } + else + set_precision_flag_down(); + } + + l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21); + l[1] = ((tmp.sigh >> 11) & 0xfffff); + + if ( exp > DOUBLE_Emax ) + { + overflow: + EXCEPTION(EX_Overflow); + if ( !(control_word & CW_Overflow) ) + return 0; + set_precision_flag_up(); + if ( !(control_word & CW_Precision) ) + return 0; + + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + /* Overflow to infinity */ + l[0] = 0x00000000; /* Set to */ + l[1] = 0x7ff00000; /* + INF */ + } + else + { + /* Add the exponent */ + l[1] |= (((exp+DOUBLE_Ebias) & 0x7ff) << 20); + } + } + } + else if (st0_tag == TW_Zero) + { + /* Number is zero */ + l[0] = 0; + l[1] = 0; + } + else if (st0_tag == TW_Infinity) + { + l[0] = 0; + l[1] = 0x7ff00000; + } + else if (st0_tag == TW_NaN) + { + /* See if we can get a valid NaN from the FPU_REG */ + l[0] = (st0_ptr->sigl >> 11) | (st0_ptr->sigh << 21); + l[1] = ((st0_ptr->sigh >> 11) & 0xfffff); + if ( !(st0_ptr->sigh & 0x40000000) ) + { + /* It is a signalling NaN */ + EXCEPTION(EX_Invalid); + if ( !(control_word & CW_Invalid) ) + return 0; + l[1] |= (0x40000000 >> 11); + } + l[1] |= 0x7ff00000; + } + else if ( st0_tag == TW_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + if ( control_word & CW_Invalid ) + { + /* The masked response */ + /* Put out the QNaN indefinite */ + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE,(void *)dfloat,8); + put_fs_long(0, (unsigned long *) dfloat); + put_fs_long(0xfff80000, 1 + (unsigned long *) dfloat); + RE_ENTRANT_CHECK_ON; + return 1; + } + else + return 0; + } + if ( st0_ptr->sign ) + l[1] |= 0x80000000; + + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE,(void *)dfloat,8); + put_fs_long(l[0], (unsigned long *)dfloat); + put_fs_long(l[1], 1 + (unsigned long *)dfloat); + RE_ENTRANT_CHECK_ON; + + return 1; +} + + +/* Put a float into user memory */ +int reg_store_single(float *single, FPU_REG *st0_ptr) +{ + long templ; + unsigned long increment = 0; /* avoid gcc warnings */ + char st0_tag = st0_ptr->tag; + + if (st0_tag == TW_Valid) + { + int exp; + FPU_REG tmp; + + reg_move(st0_ptr, &tmp); + exp = tmp.exp - EXP_BIAS; + + if ( exp < SINGLE_Emin ) + { + int precision_loss; + + /* A denormal will always underflow. */ +#ifndef PECULIAR_486 + /* An 80486 is supposed to be able to generate + a denormal exception here, but... */ + if ( st0_ptr->exp <= EXP_UNDER ) + { + /* Underflow has priority. */ + if ( control_word & CW_Underflow ) + denormal_operand(); + } +#endif PECULIAR_486 + + tmp.exp += -SINGLE_Emin + 23; /* largest exp to be 22 */ + + if ( (precision_loss = round_to_int(&tmp)) ) + { +#ifdef PECULIAR_486 + /* Did it round to a non-denormal ? */ + /* This behaviour might be regarded as peculiar, it appears + that the 80486 rounds to the dest precision, then + converts to decide underflow. */ + if ( !((tmp.sigl == 0x00800000) && + ((st0_ptr->sigh & 0x000000ff) || st0_ptr->sigl)) ) +#endif PECULIAR_486 + { + EXCEPTION(EX_Underflow); + /* This is a special case: see sec 16.2.5.1 of + the 80486 book */ + if ( !(control_word & EX_Underflow) ) + return 0; + } + EXCEPTION(precision_loss); + if ( !(control_word & EX_Precision) ) + return 0; + } + templ = tmp.sigl; + } + else + { + if ( tmp.sigl | (tmp.sigh & 0x000000ff) ) + { + unsigned long sigh = tmp.sigh; + unsigned long sigl = tmp.sigl; + + switch (control_word & CW_RC) + { + case RC_RND: + increment = ((sigh & 0xff) > 0x80) /* more than half */ + || (((sigh & 0xff) == 0x80) && sigl) /* more than half */ + || ((sigh & 0x180) == 0x180); /* round to even */ + break; + case RC_DOWN: /* towards -infinity */ + increment = (tmp.sign == SIGN_POS) + ? 0 : (sigl | (sigh & 0xff)); + break; + case RC_UP: /* towards +infinity */ + increment = (tmp.sign == SIGN_POS) + ? (sigl | (sigh & 0xff)) : 0; + break; + case RC_CHOP: + increment = 0; + break; + } + + /* Truncate part of the mantissa */ + tmp.sigl = 0; + + if (increment) + { + set_precision_flag_up(); + + if ( sigh >= 0xffffff00 ) + { + /* The sigh part overflows */ + tmp.sigh = 0x80000000; + exp++; + if ( exp >= EXP_OVER ) + goto overflow; + } + else + { + tmp.sigh &= 0xffffff00; + tmp.sigh += 0x100; + } + } + else + { + set_precision_flag_down(); + tmp.sigh &= 0xffffff00; /* Finish the truncation */ + } + } + + templ = (tmp.sigh >> 8) & 0x007fffff; + + if ( exp > SINGLE_Emax ) + { + overflow: + EXCEPTION(EX_Overflow); + if ( !(control_word & CW_Overflow) ) + return 0; + set_precision_flag_up(); + if ( !(control_word & CW_Precision) ) + return 0; + + /* This is a special case: see sec 16.2.5.1 of the 80486 book. */ + /* Masked response is overflow to infinity. */ + templ = 0x7f800000; + } + else + templ |= ((exp+SINGLE_Ebias) & 0xff) << 23; + } + } + else if (st0_tag == TW_Zero) + { + templ = 0; + } + else if (st0_tag == TW_Infinity) + { + templ = 0x7f800000; + } + else if (st0_tag == TW_NaN) + { + /* See if we can get a valid NaN from the FPU_REG */ + templ = st0_ptr->sigh >> 8; + if ( !(st0_ptr->sigh & 0x40000000) ) + { + /* It is a signalling NaN */ + EXCEPTION(EX_Invalid); + if ( !(control_word & CW_Invalid) ) + return 0; + templ |= (0x40000000 >> 8); + } + templ |= 0x7f800000; + } + else if ( st0_tag == TW_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + if ( control_word & EX_Invalid ) + { + /* The masked response */ + /* Put out the QNaN indefinite */ + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE,(void *)single,4); + put_fs_long(0xffc00000, (unsigned long *) single); + RE_ENTRANT_CHECK_ON; + return 1; + } + else + return 0; + } +#ifdef PARANOID + else + { + EXCEPTION(EX_INTERNAL|0x163); + return 0; + } +#endif + if (st0_ptr->sign) + templ |= 0x80000000; + + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE,(void *)single,4); + put_fs_long(templ,(unsigned long *) single); + RE_ENTRANT_CHECK_ON; + + return 1; +} + + +/* Put a long long into user memory */ +int reg_store_int64(long long *d, FPU_REG *st0_ptr) +{ + FPU_REG t; + long long tll; + int precision_loss; + char st0_tag = st0_ptr->tag; + + if ( st0_tag == TW_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } + else if ( (st0_tag == TW_Infinity) || + (st0_tag == TW_NaN) ) + { + EXCEPTION(EX_Invalid); + goto invalid_operand; + } + + reg_move(st0_ptr, &t); + precision_loss = round_to_int(&t); + ((long *)&tll)[0] = t.sigl; + ((long *)&tll)[1] = t.sigh; + if ( (precision_loss == 1) || + ((t.sigh & 0x80000000) && + !((t.sigh == 0x80000000) && (t.sigl == 0) && + (t.sign == SIGN_NEG))) ) + { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if ( control_word & EX_Invalid ) + { + /* Produce something like QNaN "indefinite" */ + tll = 0x8000000000000000LL; + } + else + return 0; + } + else + { + if ( precision_loss ) + set_precision_flag(precision_loss); + if ( t.sign ) + tll = - tll; + } + + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE,(void *)d,8); + put_fs_long(((long *)&tll)[0],(unsigned long *) d); + put_fs_long(((long *)&tll)[1],1 + (unsigned long *) d); + RE_ENTRANT_CHECK_ON; + + return 1; +} + + +/* Put a long into user memory */ +int reg_store_int32(long *d, FPU_REG *st0_ptr) +{ + FPU_REG t; + int precision_loss; + char st0_tag = st0_ptr->tag; + + if ( st0_tag == TW_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } + else if ( (st0_tag == TW_Infinity) || + (st0_tag == TW_NaN) ) + { + EXCEPTION(EX_Invalid); + goto invalid_operand; + } + + reg_move(st0_ptr, &t); + precision_loss = round_to_int(&t); + if (t.sigh || + ((t.sigl & 0x80000000) && + !((t.sigl == 0x80000000) && (t.sign == SIGN_NEG))) ) + { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if ( control_word & EX_Invalid ) + { + /* Produce something like QNaN "indefinite" */ + t.sigl = 0x80000000; + } + else + return 0; + } + else + { + if ( precision_loss ) + set_precision_flag(precision_loss); + if ( t.sign ) + t.sigl = -(long)t.sigl; + } + + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE,d,4); + put_fs_long(t.sigl, (unsigned long *) d); + RE_ENTRANT_CHECK_ON; + + return 1; +} + + +/* Put a short into user memory */ +int reg_store_int16(short *d, FPU_REG *st0_ptr) +{ + FPU_REG t; + int precision_loss; + char st0_tag = st0_ptr->tag; + + if ( st0_tag == TW_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } + else if ( (st0_tag == TW_Infinity) || + (st0_tag == TW_NaN) ) + { + EXCEPTION(EX_Invalid); + goto invalid_operand; + } + + reg_move(st0_ptr, &t); + precision_loss = round_to_int(&t); + if (t.sigh || + ((t.sigl & 0xffff8000) && + !((t.sigl == 0x8000) && (t.sign == SIGN_NEG))) ) + { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if ( control_word & EX_Invalid ) + { + /* Produce something like QNaN "indefinite" */ + t.sigl = 0x8000; + } + else + return 0; + } + else + { + if ( precision_loss ) + set_precision_flag(precision_loss); + if ( t.sign ) + t.sigl = -t.sigl; + } + + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE,d,2); + put_fs_word((short)t.sigl,(short *) d); + RE_ENTRANT_CHECK_ON; + + return 1; +} + + +/* Put a packed bcd array into user memory */ +int reg_store_bcd(char *d, FPU_REG *st0_ptr) +{ + FPU_REG t; + unsigned long long ll; + unsigned char b; + int i, precision_loss; + unsigned char sign = (st0_ptr->sign == SIGN_NEG) ? 0x80 : 0; + char st0_tag = st0_ptr->tag; + + if ( st0_tag == TW_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } + + reg_move(st0_ptr, &t); + precision_loss = round_to_int(&t); + ll = significand(&t); + + /* Check for overflow, by comparing with 999999999999999999 decimal. */ + if ( (t.sigh > 0x0de0b6b3) || + ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff)) ) + { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if ( control_word & CW_Invalid ) + { + /* Produce the QNaN "indefinite" */ + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE,d,10); + for ( i = 0; i < 7; i++) + put_fs_byte(0, (unsigned char *) d+i); /* These bytes "undefined" */ + put_fs_byte(0xc0, (unsigned char *) d+7); /* This byte "undefined" */ + put_fs_byte(0xff, (unsigned char *) d+8); + put_fs_byte(0xff, (unsigned char *) d+9); + RE_ENTRANT_CHECK_ON; + return 1; + } + else + return 0; + } + else if ( precision_loss ) + { + /* Precision loss doesn't stop the data transfer */ + set_precision_flag(precision_loss); + } + + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE,d,10); + RE_ENTRANT_CHECK_ON; + for ( i = 0; i < 9; i++) + { + b = div_small(&ll, 10); + b |= (div_small(&ll, 10)) << 4; + RE_ENTRANT_CHECK_OFF; + put_fs_byte(b,(unsigned char *) d+i); + RE_ENTRANT_CHECK_ON; + } + RE_ENTRANT_CHECK_OFF; + put_fs_byte(sign,(unsigned char *) d+9); + RE_ENTRANT_CHECK_ON; + + return 1; +} + +/*===========================================================================*/ + +/* r gets mangled such that sig is int, sign: + it is NOT normalized */ +/* The return value (in eax) is zero if the result is exact, + if bits are changed due to rounding, truncation, etc, then + a non-zero value is returned */ +/* Overflow is signalled by a non-zero return value (in eax). + In the case of overflow, the returned significand always has the + largest possible value */ +int round_to_int(FPU_REG *r) +{ + char very_big; + unsigned eax; + + if (r->tag == TW_Zero) + { + /* Make sure that zero is returned */ + significand(r) = 0; + return 0; /* o.k. */ + } + + if (r->exp > EXP_BIAS + 63) + { + r->sigl = r->sigh = ~0; /* The largest representable number */ + return 1; /* overflow */ + } + + eax = shrxs(&r->sigl, EXP_BIAS + 63 - r->exp); + very_big = !(~(r->sigh) | ~(r->sigl)); /* test for 0xfff...fff */ +#define half_or_more (eax & 0x80000000) +#define frac_part (eax) +#define more_than_half ((eax & 0x80000001) == 0x80000001) + switch (control_word & CW_RC) + { + case RC_RND: + if ( more_than_half /* nearest */ + || (half_or_more && (r->sigl & 1)) ) /* odd -> even */ + { + if ( very_big ) return 1; /* overflow */ + significand(r) ++; + return PRECISION_LOST_UP; + } + break; + case RC_DOWN: + if (frac_part && r->sign) + { + if ( very_big ) return 1; /* overflow */ + significand(r) ++; + return PRECISION_LOST_UP; + } + break; + case RC_UP: + if (frac_part && !r->sign) + { + if ( very_big ) return 1; /* overflow */ + significand(r) ++; + return PRECISION_LOST_UP; + } + break; + case RC_CHOP: + break; + } + + return eax ? PRECISION_LOST_DOWN : 0; + +} + +/*===========================================================================*/ + +char *fldenv(fpu_addr_modes addr_modes, char *s) +{ + unsigned short tag_word = 0; + unsigned char tag; + int i; + + if ( (addr_modes.default_mode == VM86) || + ((addr_modes.default_mode == PM16) + ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) ) + { + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_READ, s, 0x0e); + control_word = get_fs_word((unsigned short *) s); + partial_status = get_fs_word((unsigned short *) (s+2)); + tag_word = get_fs_word((unsigned short *) (s+4)); + instruction_address.offset = get_fs_word((unsigned short *) (s+6)); + instruction_address.selector = get_fs_word((unsigned short *) (s+8)); + operand_address.offset = get_fs_word((unsigned short *) (s+0x0a)); + operand_address.selector = get_fs_word((unsigned short *) (s+0x0c)); + RE_ENTRANT_CHECK_ON; + s += 0x0e; + if ( addr_modes.default_mode == VM86 ) + { + instruction_address.offset + += (instruction_address.selector & 0xf000) << 4; + operand_address.offset += (operand_address.selector & 0xf000) << 4; + } + } + else + { + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_READ, s, 0x1c); + control_word = get_fs_word((unsigned short *) s); + partial_status = get_fs_word((unsigned short *) (s+4)); + tag_word = get_fs_word((unsigned short *) (s+8)); + instruction_address.offset = get_fs_long((unsigned long *) (s+0x0c)); + instruction_address.selector = get_fs_word((unsigned short *) (s+0x10)); + instruction_address.opcode = get_fs_word((unsigned short *) (s+0x12)); + operand_address.offset = get_fs_long((unsigned long *) (s+0x14)); + operand_address.selector = get_fs_long((unsigned long *) (s+0x18)); + RE_ENTRANT_CHECK_ON; + s += 0x1c; + } + +#ifdef PECULIAR_486 + control_word &= ~0xe080; +#endif PECULIAR_486 + + top = (partial_status >> SW_Top_Shift) & 7; + + if ( partial_status & ~control_word & CW_Exceptions ) + partial_status |= (SW_Summary | SW_Backward); + else + partial_status &= ~(SW_Summary | SW_Backward); + + for ( i = 0; i < 8; i++ ) + { + tag = tag_word & 3; + tag_word >>= 2; + + if ( tag == 3 ) + /* New tag is empty. Accept it */ + regs[i].tag = TW_Empty; + else if ( regs[i].tag == TW_Empty ) + { + /* Old tag is empty and new tag is not empty. New tag is determined + by old reg contents */ + if ( regs[i].exp == EXP_BIAS - EXTENDED_Ebias ) + { + if ( !(regs[i].sigl | regs[i].sigh) ) + regs[i].tag = TW_Zero; + else + regs[i].tag = TW_Valid; + } + else if ( regs[i].exp == 0x7fff + EXP_BIAS - EXTENDED_Ebias ) + { + if ( !((regs[i].sigh & ~0x80000000) | regs[i].sigl) ) + regs[i].tag = TW_Infinity; + else + regs[i].tag = TW_NaN; + } + else + regs[i].tag = TW_Valid; + } + /* Else old tag is not empty and new tag is not empty. Old tag + remains correct */ + } + + return s; +} + + +void frstor(fpu_addr_modes addr_modes, char *data_address) +{ + int i, stnr; + unsigned char tag; + char *s = fldenv(addr_modes, data_address); + + for ( i = 0; i < 8; i++ ) + { + /* Load each register. */ + stnr = (i+top) & 7; + tag = regs[stnr].tag; /* Derived from the fldenv() loaded tag word. */ + reg_load_extended((long double *)(s+i*10), ®s[stnr]); + if ( tag == TW_Empty ) /* The loaded data over-rides all other cases. */ + regs[stnr].tag = tag; + } + +} + + +unsigned short tag_word(void) +{ + unsigned short word = 0; + unsigned char tag; + int i; + + for ( i = 7; i >= 0; i-- ) + { + switch ( tag = regs[i].tag ) + { + case TW_Valid: + if ( regs[i].exp <= (EXP_BIAS - EXTENDED_Ebias) ) + tag = 2; + break; + case TW_Infinity: + case TW_NaN: + tag = 2; + break; + case TW_Empty: + tag = 3; + break; + /* TW_Zero already has the correct value */ + } + word <<= 2; + word |= tag; + } + return word; +} + + +char *fstenv(fpu_addr_modes addr_modes, char *d) +{ + if ( (addr_modes.default_mode == VM86) || + ((addr_modes.default_mode == PM16) + ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) ) + { + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE,d,14); +#ifdef PECULIAR_486 + put_fs_long(control_word & ~0xe080, (unsigned short *) d); +#else + put_fs_word(control_word, (unsigned short *) d); +#endif PECULIAR_486 + put_fs_word(status_word(), (unsigned short *) (d+2)); + put_fs_word(tag_word(), (unsigned short *) (d+4)); + put_fs_word(instruction_address.offset, (unsigned short *) (d+6)); + put_fs_word(operand_address.offset, (unsigned short *) (d+0x0a)); + if ( addr_modes.default_mode == VM86 ) + { + put_fs_word((instruction_address.offset & 0xf0000) >> 4, + (unsigned short *) (d+8)); + put_fs_word((operand_address.offset & 0xf0000) >> 4, + (unsigned short *) (d+0x0c)); + } + else + { + put_fs_word(instruction_address.selector, (unsigned short *) (d+8)); + put_fs_word(operand_address.selector, (unsigned short *) (d+0x0c)); + } + RE_ENTRANT_CHECK_ON; + d += 0x0e; + } + else + { + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE,d,28); +#ifdef PECULIAR_486 + /* An 80486 sets all the reserved bits to 1. */ + put_fs_long(0xffff0040 | (control_word & ~0xe080), (unsigned long *) d); + put_fs_long(0xffff0000 | status_word(), (unsigned long *) (d+4)); + put_fs_long(0xffff0000 | tag_word(), (unsigned long *) (d+8)); +#else + put_fs_word(control_word, (unsigned short *) d); + put_fs_word(status_word(), (unsigned short *) (d+4)); + put_fs_word(tag_word(), (unsigned short *) (d+8)); +#endif PECULIAR_486 + put_fs_long(instruction_address.offset, (unsigned long *) (d+0x0c)); + put_fs_word(instruction_address.selector, (unsigned short *) (d+0x10)); + put_fs_word(instruction_address.opcode, (unsigned short *) (d+0x12)); + put_fs_long(operand_address.offset, (unsigned long *) (d+0x14)); +#ifdef PECULIAR_486 + /* An 80486 sets all the reserved bits to 1. */ + put_fs_word(operand_address.selector, (unsigned short *) (d+0x18)); + put_fs_word(0xffff, (unsigned short *) (d+0x1a)); +#else + put_fs_long(operand_address.selector, (unsigned long *) (d+0x18)); +#endif PECULIAR_486 + RE_ENTRANT_CHECK_ON; + d += 0x1c; + } + + control_word |= CW_Exceptions; + partial_status &= ~(SW_Summary | SW_Backward); + + return d; +} + + +void fsave(fpu_addr_modes addr_modes, char *data_address) +{ + char *d; + int i; + + d = fstenv(addr_modes, data_address); + RE_ENTRANT_CHECK_OFF; + FPU_verify_area(VERIFY_WRITE,d,80); + RE_ENTRANT_CHECK_ON; + for ( i = 0; i < 8; i++ ) + write_to_extended(®s[(top + i) & 7], d + 10 * i); + + finit(); + +} + +/*===========================================================================*/ + +/* + A call to this function must be preceded by a call to + FPU_verify_area() to verify access to the 10 bytes at d + */ +static void write_to_extended(FPU_REG *rp, char *d) +{ + long e; + FPU_REG tmp; + + e = rp->exp - EXP_BIAS + EXTENDED_Ebias; + +#ifdef PARANOID + switch ( rp->tag ) + { + case TW_Zero: + if ( rp->sigh | rp->sigl | e ) + EXCEPTION(EX_INTERNAL | 0x160); + break; + case TW_Infinity: + case TW_NaN: + if ( (e ^ 0x7fff) | !(rp->sigh & 0x80000000) ) + EXCEPTION(EX_INTERNAL | 0x161); + break; + default: + if (e > 0x7fff || e < -63) + EXCEPTION(EX_INTERNAL | 0x162); + } +#endif PARANOID + + /* + All numbers except denormals are stored internally in a + format which is compatible with the extended real number + format. + */ + if ( e > 0 ) + { + /* just copy the reg */ + RE_ENTRANT_CHECK_OFF; + put_fs_long(rp->sigl, (unsigned long *) d); + put_fs_long(rp->sigh, (unsigned long *) (d + 4)); + RE_ENTRANT_CHECK_ON; + } + else + { + /* + The number is a de-normal stored as a normal using our + extra exponent range, or is Zero. + Convert it back to a de-normal, or leave it as Zero. + */ + reg_move(rp, &tmp); + tmp.exp += -EXTENDED_Emin + 63; /* largest exp to be 63 */ + round_to_int(&tmp); + e = 0; + RE_ENTRANT_CHECK_OFF; + put_fs_long(tmp.sigl, (unsigned long *) d); + put_fs_long(tmp.sigh, (unsigned long *) (d + 4)); + RE_ENTRANT_CHECK_ON; + } + e |= rp->sign == SIGN_POS ? 0 : 0x8000; + RE_ENTRANT_CHECK_OFF; + put_fs_word(e, (unsigned short *) (d + 8)); + RE_ENTRANT_CHECK_ON; +} diff --git a/arch/i386/math-emu/reg_mul.c b/arch/i386/math-emu/reg_mul.c new file mode 100644 index 000000000..75246187b --- /dev/null +++ b/arch/i386/math-emu/reg_mul.c @@ -0,0 +1,105 @@ +/*---------------------------------------------------------------------------+ + | reg_mul.c | + | | + | Multiply one FPU_REG by another, put the result in a destination FPU_REG. | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | The destination may be any FPU_REG, including one of the source FPU_REGs. | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" + + +/* This routine must be called with non-empty source registers */ +int reg_mul(FPU_REG const *a, FPU_REG const *b, + FPU_REG *dest, unsigned int control_w) +{ + char saved_sign = dest->sign; + char sign = (a->sign ^ b->sign); + + if (!(a->tag | b->tag)) + { + /* Both regs Valid, this should be the most common case. */ + dest->sign = sign; + if ( reg_u_mul(a, b, dest, control_w) ) + { + dest->sign = saved_sign; + return 1; + } + return 0; + } + else if ((a->tag <= TW_Zero) && (b->tag <= TW_Zero)) + { +#ifdef DENORM_OPERAND + if ( ((b->tag == TW_Valid) && (b->exp <= EXP_UNDER)) || + ((a->tag == TW_Valid) && (a->exp <= EXP_UNDER)) ) + { + if ( denormal_operand() ) return 1; + } +#endif DENORM_OPERAND + /* Must have either both arguments == zero, or + one valid and the other zero. + The result is therefore zero. */ + reg_move(&CONST_Z, dest); + /* The 80486 book says that the answer is +0, but a real + 80486 behaves this way. + IEEE-754 apparently says it should be this way. */ + dest->sign = sign; + return 0; + } + else + { + /* Must have infinities, NaNs, etc */ + if ( (a->tag == TW_NaN) || (b->tag == TW_NaN) ) + { return real_2op_NaN(a, b, dest); } + else if (a->tag == TW_Infinity) + { + if (b->tag == TW_Zero) + { return arith_invalid(dest); } /* Zero*Infinity is invalid */ + else + { +#ifdef DENORM_OPERAND + if ( (b->tag == TW_Valid) && (b->exp <= EXP_UNDER) && + denormal_operand() ) + return 1; +#endif DENORM_OPERAND + reg_move(a, dest); + dest->sign = sign; + } + return 0; + } + else if (b->tag == TW_Infinity) + { + if (a->tag == TW_Zero) + { return arith_invalid(dest); } /* Zero*Infinity is invalid */ + else + { +#ifdef DENORM_OPERAND + if ( (a->tag == TW_Valid) && (a->exp <= EXP_UNDER) && + denormal_operand() ) + return 1; +#endif DENORM_OPERAND + reg_move(b, dest); + dest->sign = sign; + } + return 0; + } +#ifdef PARANOID + else + { + EXCEPTION(EX_INTERNAL|0x102); + return 1; + } +#endif PARANOID + } +} diff --git a/arch/i386/math-emu/reg_norm.S b/arch/i386/math-emu/reg_norm.S new file mode 100644 index 000000000..9b7a9d77d --- /dev/null +++ b/arch/i386/math-emu/reg_norm.S @@ -0,0 +1,150 @@ +/*---------------------------------------------------------------------------+ + | reg_norm.S | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | Normalize the value in a FPU_REG. | + | | + | Call from C as: | + | void normalize(FPU_REG *n) | + | | + | void normalize_nuo(FPU_REG *n) | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_asm.h" + + +.text + + .align 2,144 +.globl _normalize + +_normalize: + pushl %ebp + movl %esp,%ebp + pushl %ebx + + movl PARAM1,%ebx + +#ifdef PARANOID + cmpb TW_Valid,TAG(%ebx) + je L_ok + + pushl $0x220 + call _exception + addl $4,%esp + +L_ok: +#endif PARANOID + + movl SIGH(%ebx),%edx + movl SIGL(%ebx),%eax + + orl %edx,%edx /* ms bits */ + js L_done /* Already normalized */ + jnz L_shift_1 /* Shift left 1 - 31 bits */ + + orl %eax,%eax + jz L_zero /* The contents are zero */ + + movl %eax,%edx + xorl %eax,%eax + subl $32,EXP(%ebx) /* This can cause an underflow */ + +/* We need to shift left by 1 - 31 bits */ +L_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + shld %cl,%eax,%edx + shl %cl,%eax + subl %ecx,EXP(%ebx) /* This can cause an underflow */ + + movl %edx,SIGH(%ebx) + movl %eax,SIGL(%ebx) + +L_done: + cmpl EXP_OVER,EXP(%ebx) + jge L_overflow + + cmpl EXP_UNDER,EXP(%ebx) + jle L_underflow + +L_exit: + popl %ebx + leave + ret + + +L_zero: + movl EXP_UNDER,EXP(%ebx) + movb TW_Zero,TAG(%ebx) + jmp L_exit + +L_underflow: + push %ebx + call _arith_underflow + pop %ebx + jmp L_exit + +L_overflow: + push %ebx + call _arith_overflow + pop %ebx + jmp L_exit + + + +/* Normalise without reporting underflow or overflow */ + .align 2,144 +.globl _normalize_nuo + +_normalize_nuo: + pushl %ebp + movl %esp,%ebp + pushl %ebx + + movl PARAM1,%ebx + +#ifdef PARANOID + cmpb TW_Valid,TAG(%ebx) + je L_ok_nuo + + pushl $0x221 + call _exception + addl $4,%esp + +L_ok_nuo: +#endif PARANOID + + movl SIGH(%ebx),%edx + movl SIGL(%ebx),%eax + + orl %edx,%edx /* ms bits */ + js L_exit /* Already normalized */ + jnz L_nuo_shift_1 /* Shift left 1 - 31 bits */ + + orl %eax,%eax + jz L_zero /* The contents are zero */ + + movl %eax,%edx + xorl %eax,%eax + subl $32,EXP(%ebx) /* This can cause an underflow */ + +/* We need to shift left by 1 - 31 bits */ +L_nuo_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + shld %cl,%eax,%edx + shl %cl,%eax + subl %ecx,EXP(%ebx) /* This can cause an underflow */ + + movl %edx,SIGH(%ebx) + movl %eax,SIGL(%ebx) + jmp L_exit + + diff --git a/arch/i386/math-emu/reg_round.S b/arch/i386/math-emu/reg_round.S new file mode 100644 index 000000000..bd8a40dc4 --- /dev/null +++ b/arch/i386/math-emu/reg_round.S @@ -0,0 +1,701 @@ + .file "reg_round.S" +/*---------------------------------------------------------------------------+ + | reg_round.S | + | | + | Rounding/truncation/etc for FPU basic arithmetic functions. | + | | + | Copyright (C) 1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | This code has four possible entry points. | + | The following must be entered by a jmp instruction: | + | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. | + | | + | The _round_reg entry point is intended to be used by C code. | + | From C, call as: | + | void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) | + | | + | For correct "up" and "down" rounding, the argument must have the correct | + | sign. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Four entry points. | + | | + | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: | + | %eax:%ebx 64 bit significand | + | %edx 32 bit extension of the significand | + | %edi pointer to an FPU_REG for the result to be stored | + | stack calling function must have set up a C stack frame and | + | pushed %esi, %edi, and %ebx | + | | + | Needed just for the fpu_reg_round_sqrt entry point: | + | %cx A control word in the same format as the FPU control word. | + | Otherwise, PARAM4 must give such a value. | + | | + | | + | The significand and its extension are assumed to be exact in the | + | following sense: | + | If the significand by itself is the exact result then the significand | + | extension (%edx) must contain 0, otherwise the significand extension | + | must be non-zero. | + | If the significand extension is non-zero then the significand is | + | smaller than the magnitude of the correct exact result by an amount | + | greater than zero and less than one ls bit of the significand. | + | The significand extension is only required to have three possible | + | non-zero values: | + | less than 0x80000000 <=> the significand is less than 1/2 an ls | + | bit smaller than the magnitude of the | + | true exact result. | + | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | + | smaller than the magnitude of the true | + | exact result. | + | greater than 0x80000000 <=> the significand is more than 1/2 an ls | + | bit smaller than the magnitude of the | + | true exact result. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | The code in this module has become quite complex, but it should handle | + | all of the FPU flags which are set at this stage of the basic arithmetic | + | computations. | + | There are a few rare cases where the results are not set identically to | + | a real FPU. These require a bit more thought because at this stage the | + | results of the code here appear to be more consistent... | + | This may be changed in a future version. | + +---------------------------------------------------------------------------*/ + + +#include "fpu_asm.h" +#include "exception.h" +#include "control_w.h" + +/* Flags for FPU_bits_lost */ +#define LOST_DOWN $1 +#define LOST_UP $2 + +/* Flags for FPU_denormal */ +#define DENORMAL $1 +#define UNMASKED_UNDERFLOW $2 + + +#ifndef NON_REENTRANT_FPU +/* Make the code re-entrant by putting + local storage on the stack: */ +#define FPU_bits_lost (%esp) +#define FPU_denormal 1(%esp) + +#else +/* Not re-entrant, so we can gain speed by putting + local storage in a static area: */ +.data + .align 2,0 +FPU_bits_lost: + .byte 0 +FPU_denormal: + .byte 0 +#endif NON_REENTRANT_FPU + + +.text + .align 2,144 +.globl fpu_reg_round +.globl fpu_reg_round_sqrt +.globl fpu_Arith_exit +.globl _round_reg + +/* Entry point when called from C */ +_round_reg: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%edi + movl SIGH(%edi),%eax + movl SIGL(%edi),%ebx + movl PARAM2,%edx + movl PARAM3,%ecx + jmp fpu_reg_round_sqrt + +fpu_reg_round: /* Normal entry point */ + movl PARAM4,%ecx + +fpu_reg_round_sqrt: /* Entry point from wm_sqrt.S */ + +#ifndef NON_REENTRANT_FPU + pushl %ebx /* adjust the stack pointer */ +#endif NON_REENTRANT_FPU + +#ifdef PARANOID +/* Cannot use this here yet */ +/* orl %eax,%eax */ +/* jns L_entry_bugged */ +#endif PARANOID + + cmpl EXP_UNDER,EXP(%edi) + jle xMake_denorm /* The number is a de-normal */ + + movb $0,FPU_denormal /* 0 -> not a de-normal */ + +xDenorm_done: + movb $0,FPU_bits_lost /* No bits yet lost in rounding */ + + movl %ecx,%esi + andl CW_PC,%ecx + cmpl PR_64_BITS,%ecx + je LRound_To_64 + + cmpl PR_53_BITS,%ecx + je LRound_To_53 + + cmpl PR_24_BITS,%ecx + je LRound_To_24 + +#ifdef PECULIAR_486 +/* With the precision control bits set to 01 "(reserved)", a real 80486 + behaves as if the precision control bits were set to 11 "64 bits" */ + cmpl PR_RESERVED_BITS,%ecx + je LRound_To_64 +#ifdef PARANOID + jmp L_bugged_denorm_486 +#endif PARANOID +#else +#ifdef PARANOID + jmp L_bugged_denorm /* There is no bug, just a bad control word */ +#endif PARANOID +#endif PECULIAR_486 + + +/* Round etc to 24 bit precision */ +LRound_To_24: + movl %esi,%ecx + andl CW_RC,%ecx + cmpl RC_RND,%ecx + je LRound_nearest_24 + + cmpl RC_CHOP,%ecx + je LCheck_truncate_24 + + cmpl RC_UP,%ecx /* Towards +infinity */ + je LUp_24 + + cmpl RC_DOWN,%ecx /* Towards -infinity */ + je LDown_24 + +#ifdef PARANOID + jmp L_bugged_round24 +#endif PARANOID + +LUp_24: + cmpb SIGN_POS,SIGN(%edi) + jne LCheck_truncate_24 /* If negative then up==truncate */ + + jmp LCheck_24_round_up + +LDown_24: + cmpb SIGN_POS,SIGN(%edi) + je LCheck_truncate_24 /* If positive then down==truncate */ + +LCheck_24_round_up: + movl %eax,%ecx + andl $0x000000ff,%ecx + orl %ebx,%ecx + orl %edx,%ecx + jnz LDo_24_round_up + jmp LRe_normalise + +LRound_nearest_24: + /* Do rounding of the 24th bit if needed (nearest or even) */ + movl %eax,%ecx + andl $0x000000ff,%ecx + cmpl $0x00000080,%ecx + jc LCheck_truncate_24 /* less than half, no increment needed */ + + jne LGreater_Half_24 /* greater than half, increment needed */ + + /* Possibly half, we need to check the ls bits */ + orl %ebx,%ebx + jnz LGreater_Half_24 /* greater than half, increment needed */ + + orl %edx,%edx + jnz LGreater_Half_24 /* greater than half, increment needed */ + + /* Exactly half, increment only if 24th bit is 1 (round to even) */ + testl $0x00000100,%eax + jz LDo_truncate_24 + +LGreater_Half_24: /* Rounding: increment at the 24th bit */ +LDo_24_round_up: + andl $0xffffff00,%eax /* Truncate to 24 bits */ + xorl %ebx,%ebx + movb LOST_UP,FPU_bits_lost + addl $0x00000100,%eax + jmp LCheck_Round_Overflow + +LCheck_truncate_24: + movl %eax,%ecx + andl $0x000000ff,%ecx + orl %ebx,%ecx + orl %edx,%ecx + jz LRe_normalise /* No truncation needed */ + +LDo_truncate_24: + andl $0xffffff00,%eax /* Truncate to 24 bits */ + xorl %ebx,%ebx + movb LOST_DOWN,FPU_bits_lost + jmp LRe_normalise + + +/* Round etc to 53 bit precision */ +LRound_To_53: + movl %esi,%ecx + andl CW_RC,%ecx + cmpl RC_RND,%ecx + je LRound_nearest_53 + + cmpl RC_CHOP,%ecx + je LCheck_truncate_53 + + cmpl RC_UP,%ecx /* Towards +infinity */ + je LUp_53 + + cmpl RC_DOWN,%ecx /* Towards -infinity */ + je LDown_53 + +#ifdef PARANOID + jmp L_bugged_round53 +#endif PARANOID + +LUp_53: + cmpb SIGN_POS,SIGN(%edi) + jne LCheck_truncate_53 /* If negative then up==truncate */ + + jmp LCheck_53_round_up + +LDown_53: + cmpb SIGN_POS,SIGN(%edi) + je LCheck_truncate_53 /* If positive then down==truncate */ + +LCheck_53_round_up: + movl %ebx,%ecx + andl $0x000007ff,%ecx + orl %edx,%ecx + jnz LDo_53_round_up + jmp LRe_normalise + +LRound_nearest_53: + /* Do rounding of the 53rd bit if needed (nearest or even) */ + movl %ebx,%ecx + andl $0x000007ff,%ecx + cmpl $0x00000400,%ecx + jc LCheck_truncate_53 /* less than half, no increment needed */ + + jnz LGreater_Half_53 /* greater than half, increment needed */ + + /* Possibly half, we need to check the ls bits */ + orl %edx,%edx + jnz LGreater_Half_53 /* greater than half, increment needed */ + + /* Exactly half, increment only if 53rd bit is 1 (round to even) */ + testl $0x00000800,%ebx + jz LTruncate_53 + +LGreater_Half_53: /* Rounding: increment at the 53rd bit */ +LDo_53_round_up: + movb LOST_UP,FPU_bits_lost + andl $0xfffff800,%ebx /* Truncate to 53 bits */ + addl $0x00000800,%ebx + adcl $0,%eax + jmp LCheck_Round_Overflow + +LCheck_truncate_53: + movl %ebx,%ecx + andl $0x000007ff,%ecx + orl %edx,%ecx + jz LRe_normalise + +LTruncate_53: + movb LOST_DOWN,FPU_bits_lost + andl $0xfffff800,%ebx /* Truncate to 53 bits */ + jmp LRe_normalise + + +/* Round etc to 64 bit precision */ +LRound_To_64: + movl %esi,%ecx + andl CW_RC,%ecx + cmpl RC_RND,%ecx + je LRound_nearest_64 + + cmpl RC_CHOP,%ecx + je LCheck_truncate_64 + + cmpl RC_UP,%ecx /* Towards +infinity */ + je LUp_64 + + cmpl RC_DOWN,%ecx /* Towards -infinity */ + je LDown_64 + +#ifdef PARANOID + jmp L_bugged_round64 +#endif PARANOID + +LUp_64: + cmpb SIGN_POS,SIGN(%edi) + jne LCheck_truncate_64 /* If negative then up==truncate */ + + orl %edx,%edx + jnz LDo_64_round_up + jmp LRe_normalise + +LDown_64: + cmpb SIGN_POS,SIGN(%edi) + je LCheck_truncate_64 /* If positive then down==truncate */ + + orl %edx,%edx + jnz LDo_64_round_up + jmp LRe_normalise + +LRound_nearest_64: + cmpl $0x80000000,%edx + jc LCheck_truncate_64 + + jne LDo_64_round_up + + /* Now test for round-to-even */ + testb $1,%ebx + jz LCheck_truncate_64 + +LDo_64_round_up: + movb LOST_UP,FPU_bits_lost + addl $1,%ebx + adcl $0,%eax + +LCheck_Round_Overflow: + jnc LRe_normalise + + /* Overflow, adjust the result (significand to 1.0) */ + rcrl $1,%eax + rcrl $1,%ebx + incl EXP(%edi) + jmp LRe_normalise + +LCheck_truncate_64: + orl %edx,%edx + jz LRe_normalise + +LTruncate_64: + movb LOST_DOWN,FPU_bits_lost + +LRe_normalise: + testb $0xff,FPU_denormal + jnz xNormalise_result + +xL_Normalised: + cmpb LOST_UP,FPU_bits_lost + je xL_precision_lost_up + + cmpb LOST_DOWN,FPU_bits_lost + je xL_precision_lost_down + +xL_no_precision_loss: + /* store the result */ + movb TW_Valid,TAG(%edi) + +xL_Store_significand: + movl %eax,SIGH(%edi) + movl %ebx,SIGL(%edi) + + xorl %eax,%eax /* No errors detected. */ + + cmpl EXP_OVER,EXP(%edi) + jge L_overflow + +fpu_reg_round_exit: +#ifndef NON_REENTRANT_FPU + popl %ebx /* adjust the stack pointer */ +#endif NON_REENTRANT_FPU + +fpu_Arith_exit: + popl %ebx + popl %edi + popl %esi + leave + ret + + +/* + * Set the FPU status flags to represent precision loss due to + * round-up. + */ +xL_precision_lost_up: + push %eax + call _set_precision_flag_up + popl %eax + jmp xL_no_precision_loss + +/* + * Set the FPU status flags to represent precision loss due to + * truncation. + */ +xL_precision_lost_down: + push %eax + call _set_precision_flag_down + popl %eax + jmp xL_no_precision_loss + + +/* + * The number is a denormal (which might get rounded up to a normal) + * Shift the number right the required number of bits, which will + * have to be undone later... + */ +xMake_denorm: + /* The action to be taken depends upon whether the underflow + exception is masked */ + testb CW_Underflow,%cl /* Underflow mask. */ + jz xUnmasked_underflow /* Do not make a denormal. */ + + movb DENORMAL,FPU_denormal + + pushl %ecx /* Save */ + movl EXP_UNDER+1,%ecx + subl EXP(%edi),%ecx + + cmpl $64,%ecx /* shrd only works for 0..31 bits */ + jnc xDenorm_shift_more_than_63 + + cmpl $32,%ecx /* shrd only works for 0..31 bits */ + jnc xDenorm_shift_more_than_32 + +/* + * We got here without jumps by assuming that the most common requirement + * is for a small de-normalising shift. + * Shift by [1..31] bits + */ + addl %ecx,EXP(%edi) + orl %edx,%edx /* extension */ + setne %ch /* Save whether %edx is non-zero */ + xorl %edx,%edx + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + orb %ch,%dl + popl %ecx + jmp xDenorm_done + +/* Shift by [32..63] bits */ +xDenorm_shift_more_than_32: + addl %ecx,EXP(%edi) + subb $32,%cl + orl %edx,%edx + setne %ch + orb %ch,%bl + xorl %edx,%edx + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + orl %edx,%edx /* test these 32 bits */ + setne %cl + orb %ch,%bl + orb %cl,%bl + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + popl %ecx + jmp xDenorm_done + +/* Shift by [64..) bits */ +xDenorm_shift_more_than_63: + cmpl $64,%ecx + jne xDenorm_shift_more_than_64 + +/* Exactly 64 bit shift */ + addl %ecx,EXP(%edi) + xorl %ecx,%ecx + orl %edx,%edx + setne %cl + orl %ebx,%ebx + setne %ch + orb %ch,%cl + orb %cl,%al + movl %eax,%edx + xorl %eax,%eax + xorl %ebx,%ebx + popl %ecx + jmp xDenorm_done + +xDenorm_shift_more_than_64: + movl EXP_UNDER+1,EXP(%edi) +/* This is easy, %eax must be non-zero, so.. */ + movl $1,%edx + xorl %eax,%eax + xorl %ebx,%ebx + popl %ecx + jmp xDenorm_done + + +xUnmasked_underflow: + movb UNMASKED_UNDERFLOW,FPU_denormal + jmp xDenorm_done + + +/* Undo the de-normalisation. */ +xNormalise_result: + cmpb UNMASKED_UNDERFLOW,FPU_denormal + je xSignal_underflow + +/* The number must be a denormal if we got here. */ +#ifdef PARANOID + /* But check it... just in case. */ + cmpl EXP_UNDER+1,EXP(%edi) + jne L_norm_bugged +#endif PARANOID + +#ifdef PECULIAR_486 + /* + * This implements a special feature of 80486 behaviour. + * Underflow will be signalled even if the number is + * not a denormal after rounding. + * This difference occurs only for masked underflow, and not + * in the unmasked case. + * Actual 80486 behaviour differs from this in some circumstances. + */ + orl %eax,%eax /* ms bits */ + js LNormalise_shift_done /* Will be masked underflow */ +#endif PECULIAR_486 + + orl %eax,%eax /* ms bits */ + js xL_Normalised /* No longer a denormal */ + + jnz LNormalise_shift_up_to_31 /* Shift left 0 - 31 bits */ + + orl %ebx,%ebx + jz L_underflow_to_zero /* The contents are zero */ + +/* Shift left 32 - 63 bits */ + movl %ebx,%eax + xorl %ebx,%ebx + subl $32,EXP(%edi) + +LNormalise_shift_up_to_31: + bsrl %eax,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + shld %cl,%ebx,%eax + shl %cl,%ebx + subl %ecx,EXP(%edi) + +LNormalise_shift_done: + testb $0xff,FPU_bits_lost /* bits lost == underflow */ + jz xL_Normalised + + /* There must be a masked underflow */ + push %eax + pushl EX_Underflow + call _exception + popl %eax + popl %eax + jmp xL_Normalised + + +/* + * The operations resulted in a number too small to represent. + * Masked response. + */ +L_underflow_to_zero: + push %eax + call _set_precision_flag_down + popl %eax + + push %eax + pushl EX_Underflow + call _exception + popl %eax + popl %eax + +/* Reduce the exponent to EXP_UNDER */ + movl EXP_UNDER,EXP(%edi) + movb TW_Zero,TAG(%edi) + jmp xL_Store_significand + + +/* The operations resulted in a number too large to represent. */ +L_overflow: + push %edi + call _arith_overflow + pop %edi + jmp fpu_reg_round_exit + + +xSignal_underflow: + /* The number may have been changed to a non-denormal */ + /* by the rounding operations. */ + cmpl EXP_UNDER,EXP(%edi) + jle xDo_unmasked_underflow + + jmp xL_Normalised + +xDo_unmasked_underflow: + /* Increase the exponent by the magic number */ + addl $(3*(1<<13)),EXP(%edi) + push %eax + pushl EX_Underflow + call EXCEPTION + popl %eax + popl %eax + jmp xL_Normalised + + +#ifdef PARANOID +#ifdef PECULIAR_486 +L_bugged_denorm_486: + pushl EX_INTERNAL|0x236 + call EXCEPTION + popl %ebx + jmp L_exception_exit +#else +L_bugged_denorm: + pushl EX_INTERNAL|0x230 + call EXCEPTION + popl %ebx + jmp L_exception_exit +#endif PECULIAR_486 + +L_bugged_round24: + pushl EX_INTERNAL|0x231 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_bugged_round53: + pushl EX_INTERNAL|0x232 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_bugged_round64: + pushl EX_INTERNAL|0x233 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_norm_bugged: + pushl EX_INTERNAL|0x234 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_entry_bugged: + pushl EX_INTERNAL|0x235 + call EXCEPTION + popl %ebx +L_exception_exit: + mov $1,%eax + jmp fpu_reg_round_exit +#endif PARANOID diff --git a/arch/i386/math-emu/reg_u_add.S b/arch/i386/math-emu/reg_u_add.S new file mode 100644 index 000000000..4410f8fd4 --- /dev/null +++ b/arch/i386/math-emu/reg_u_add.S @@ -0,0 +1,189 @@ + .file "reg_u_add.S" +/*---------------------------------------------------------------------------+ + | reg_u_add.S | + | | + | Add two valid (TW_Valid) FPU_REG numbers, of the same sign, and put the | + | result in a destination FPU_REG. | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | Call from C as: | + | void reg_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | + | int control_w) | + | | + +---------------------------------------------------------------------------*/ + +/* + | Kernel addition routine reg_u_add(reg *arg1, reg *arg2, reg *answ). + | Takes two valid reg f.p. numbers (TW_Valid), which are + | treated as unsigned numbers, + | and returns their sum as a TW_Valid or TW_S f.p. number. + | The returned number is normalized. + | Basic checks are performed if PARANOID is defined. + */ + +#include "exception.h" +#include "fpu_asm.h" +#include "control_w.h" + +.text + .align 2,144 +.globl _reg_u_add +_reg_u_add: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi /* source 1 */ + movl PARAM2,%edi /* source 2 */ + +#ifdef DENORM_OPERAND + cmpl EXP_UNDER,EXP(%esi) + jg xOp1_not_denorm + + call _denormal_operand + orl %eax,%eax + jnz fpu_Arith_exit + +xOp1_not_denorm: + cmpl EXP_UNDER,EXP(%edi) + jg xOp2_not_denorm + + call _denormal_operand + orl %eax,%eax + jnz fpu_Arith_exit + +xOp2_not_denorm: +#endif DENORM_OPERAND + + movl EXP(%esi),%ecx + subl EXP(%edi),%ecx /* exp1 - exp2 */ + jge L_arg1_larger + + /* num1 is smaller */ + movl SIGL(%esi),%ebx + movl SIGH(%esi),%eax + + movl %edi,%esi + negw %cx + jmp L_accum_loaded + +L_arg1_larger: + /* num1 has larger or equal exponent */ + movl SIGL(%edi),%ebx + movl SIGH(%edi),%eax + +L_accum_loaded: + movl PARAM3,%edi /* destination */ +/* movb SIGN(%esi),%dl + movb %dl,SIGN(%edi) */ /* Copy the sign from the first arg */ + + + movl EXP(%esi),%edx + movl %edx,EXP(%edi) /* Copy exponent to destination */ + + xorl %edx,%edx /* clear the extension */ + +#ifdef PARANOID + testl $0x80000000,%eax + je L_bugged + + testl $0x80000000,SIGH(%esi) + je L_bugged +#endif PARANOID + +/* The number to be shifted is in %eax:%ebx:%edx */ + cmpw $32,%cx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + jmp L_shift_done + +L_more_than_31: + cmpw $64,%cx + jnc L_more_than_63 + + subb $32,%cl + jz L_exactly_32 + + shrd %cl,%eax,%edx + shr %cl,%eax + orl %ebx,%ebx + jz L_more_31_no_low /* none of the lowest bits is set */ + + orl $1,%edx /* record the fact in the extension */ + +L_more_31_no_low: + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_exactly_32: + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_more_than_63: + cmpw $65,%cx + jnc L_more_than_64 + + movl %eax,%edx + orl %ebx,%ebx + jz L_more_63_no_low + + orl $1,%edx + jmp L_more_63_no_low + +L_more_than_64: + movl $1,%edx /* The shifted nr always at least one '1' */ + +L_more_63_no_low: + xorl %ebx,%ebx + xorl %eax,%eax + +L_shift_done: + /* Now do the addition */ + addl SIGL(%esi),%ebx + adcl SIGH(%esi),%eax + jnc L_round_the_result + + /* Overflow, adjust the result */ + rcrl $1,%eax + rcrl $1,%ebx + rcrl $1,%edx + jnc L_no_bit_lost + + orl $1,%edx + +L_no_bit_lost: + incl EXP(%edi) + +L_round_the_result: + jmp fpu_reg_round /* Round the result */ + + + +#ifdef PARANOID +/* If we ever get here then we have problems! */ +L_bugged: + pushl EX_INTERNAL|0x201 + call EXCEPTION + pop %ebx + jmp L_exit +#endif PARANOID + + +L_exit: + popl %ebx + popl %edi + popl %esi + leave + ret diff --git a/arch/i386/math-emu/reg_u_div.S b/arch/i386/math-emu/reg_u_div.S new file mode 100644 index 000000000..328e9116e --- /dev/null +++ b/arch/i386/math-emu/reg_u_div.S @@ -0,0 +1,477 @@ + .file "reg_u_div.S" +/*---------------------------------------------------------------------------+ + | reg_u_div.S | + | | + | Core division routines | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Kernel for the division routines. | + | | + | void reg_u_div(FPU_REG *a, FPU_REG *a, | + | FPU_REG *dest, unsigned int control_word) | + | | + | Does not compute the destination exponent, but does adjust it. | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_asm.h" +#include "control_w.h" + + +/* #define dSIGL(x) (x) */ +/* #define dSIGH(x) 4(x) */ + + +#ifndef NON_REENTRANT_FPU +/* + Local storage on the stack: + Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + Overflow flag: ovfl_flag + */ +#define FPU_accum_3 -4(%ebp) +#define FPU_accum_2 -8(%ebp) +#define FPU_accum_1 -12(%ebp) +#define FPU_accum_0 -16(%ebp) +#define FPU_result_1 -20(%ebp) +#define FPU_result_2 -24(%ebp) +#define FPU_ovfl_flag -28(%ebp) + +#else +.data +/* + Local storage in a static area: + Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + Overflow flag: ovfl_flag + */ + .align 2,0 +FPU_accum_3: + .long 0 +FPU_accum_2: + .long 0 +FPU_accum_1: + .long 0 +FPU_accum_0: + .long 0 +FPU_result_1: + .long 0 +FPU_result_2: + .long 0 +FPU_ovfl_flag: + .byte 0 +#endif NON_REENTRANT_FPU + + +.text + .align 2,144 + +.globl _reg_u_div + +.globl _divide_kernel + +_reg_u_div: + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $28,%esp +#endif NON_REENTRANT_FPU + + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi /* pointer to num */ + movl PARAM2,%ebx /* pointer to denom */ + movl PARAM3,%edi /* pointer to answer */ + +#ifdef DENORM_OPERAND + movl EXP(%esi),%eax + cmpl EXP_UNDER,%eax + jg xOp1_not_denorm + + call _denormal_operand + orl %eax,%eax + jnz fpu_Arith_exit + +xOp1_not_denorm: + movl EXP(%ebx),%eax + cmpl EXP_UNDER,%eax + jg xOp2_not_denorm + + call _denormal_operand + orl %eax,%eax + jnz fpu_Arith_exit + +xOp2_not_denorm: +#endif DENORM_OPERAND + +_divide_kernel: +#ifdef PARANOID +/* testl $0x80000000, SIGH(%esi) // Dividend */ +/* je L_bugged */ + testl $0x80000000, SIGH(%ebx) /* Divisor */ + je L_bugged +#endif PARANOID + +/* Check if the divisor can be treated as having just 32 bits */ + cmpl $0,SIGL(%ebx) + jnz L_Full_Division /* Can't do a quick divide */ + +/* We should be able to zip through the division here */ + movl SIGH(%ebx),%ecx /* The divisor */ + movl SIGH(%esi),%edx /* Dividend */ + movl SIGL(%esi),%eax /* Dividend */ + + cmpl %ecx,%edx + setaeb FPU_ovfl_flag /* Keep a record */ + jb L_no_adjust + + subl %ecx,%edx /* Prevent the overflow */ + +L_no_adjust: + /* Divide the 64 bit number by the 32 bit denominator */ + divl %ecx + movl %eax,FPU_result_2 + + /* Work on the remainder of the first division */ + xorl %eax,%eax + divl %ecx + movl %eax,FPU_result_1 + + /* Work on the remainder of the 64 bit division */ + xorl %eax,%eax + divl %ecx + + testb $255,FPU_ovfl_flag /* was the num > denom ? */ + je L_no_overflow + + /* Do the shifting here */ + /* increase the exponent */ + incl EXP(%edi) + + /* shift the mantissa right one bit */ + stc /* To set the ms bit */ + rcrl FPU_result_2 + rcrl FPU_result_1 + rcrl %eax + +L_no_overflow: + jmp LRound_precision /* Do the rounding as required */ + + +/*---------------------------------------------------------------------------+ + | Divide: Return arg1/arg2 to arg3. | + | | + | This routine does not use the exponents of arg1 and arg2, but does | + | adjust the exponent of arg3. | + | | + | The maximum returned value is (ignoring exponents) | + | .ffffffff ffffffff | + | ------------------ = 1.ffffffff fffffffe | + | .80000000 00000000 | + | and the minimum is | + | .80000000 00000000 | + | ------------------ = .80000000 00000001 (rounded) | + | .ffffffff ffffffff | + | | + +---------------------------------------------------------------------------*/ + + +L_Full_Division: + /* Save extended dividend in local register */ + movl SIGL(%esi),%eax + movl %eax,FPU_accum_2 + movl SIGH(%esi),%eax + movl %eax,FPU_accum_3 + xorl %eax,%eax + movl %eax,FPU_accum_1 /* zero the extension */ + movl %eax,FPU_accum_0 /* zero the extension */ + + movl SIGL(%esi),%eax /* Get the current num */ + movl SIGH(%esi),%edx + +/*----------------------------------------------------------------------*/ +/* Initialization done. + Do the first 32 bits. */ + + movb $0,FPU_ovfl_flag + cmpl SIGH(%ebx),%edx /* Test for imminent overflow */ + jb LLess_than_1 + ja LGreater_than_1 + + cmpl SIGL(%ebx),%eax + jb LLess_than_1 + +LGreater_than_1: +/* The dividend is greater or equal, would cause overflow */ + setaeb FPU_ovfl_flag /* Keep a record */ + + subl SIGL(%ebx),%eax + sbbl SIGH(%ebx),%edx /* Prevent the overflow */ + movl %eax,FPU_accum_2 + movl %edx,FPU_accum_3 + +LLess_than_1: +/* At this point, we have a dividend < divisor, with a record of + adjustment in FPU_ovfl_flag */ + + /* We will divide by a number which is too large */ + movl SIGH(%ebx),%ecx + addl $1,%ecx + jnc LFirst_div_not_1 + + /* here we need to divide by 100000000h, + i.e., no division at all.. */ + mov %edx,%eax + jmp LFirst_div_done + +LFirst_div_not_1: + divl %ecx /* Divide the numerator by the augmented + denom ms dw */ + +LFirst_div_done: + movl %eax,FPU_result_2 /* Put the result in the answer */ + + mull SIGH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_2 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_3 + + movl FPU_result_2,%eax /* Get the result back */ + mull SIGL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + sbbl $0,FPU_accum_3 + je LDo_2nd_32_bits /* Must check for non-zero result here */ + +#ifdef PARANOID + jb L_bugged_1 +#endif PARANOID + + /* need to subtract another once of the denom */ + incl FPU_result_2 /* Correct the answer */ + + movl SIGL(%ebx),%eax + movl SIGH(%ebx),%edx + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + sbbl $0,FPU_accum_3 + jne L_bugged_1 /* Must check for non-zero result here */ +#endif PARANOID + +/*----------------------------------------------------------------------*/ +/* Half of the main problem is done, there is just a reduced numerator + to handle now. + Work with the second 32 bits, FPU_accum_0 not used from now on */ +LDo_2nd_32_bits: + movl FPU_accum_2,%edx /* get the reduced num */ + movl FPU_accum_1,%eax + + /* need to check for possible subsequent overflow */ + cmpl SIGH(%ebx),%edx + jb LDo_2nd_div + ja LPrevent_2nd_overflow + + cmpl SIGL(%ebx),%eax + jb LDo_2nd_div + +LPrevent_2nd_overflow: +/* The numerator is greater or equal, would cause overflow */ + /* prevent overflow */ + subl SIGL(%ebx),%eax + sbbl SIGH(%ebx),%edx + movl %edx,FPU_accum_2 + movl %eax,FPU_accum_1 + + incl FPU_result_2 /* Reflect the subtraction in the answer */ + +#ifdef PARANOID + je L_bugged_2 /* Can't bump the result to 1.0 */ +#endif PARANOID + +LDo_2nd_div: + cmpl $0,%ecx /* augmented denom msw */ + jnz LSecond_div_not_1 + + /* %ecx == 0, we are dividing by 1.0 */ + mov %edx,%eax + jmp LSecond_div_done + +LSecond_div_not_1: + divl %ecx /* Divide the numerator by the denom ms dw */ + +LSecond_div_done: + movl %eax,FPU_result_1 /* Put the result in the answer */ + + mull SIGH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif PARANOID + + movl FPU_result_1,%eax /* Get the result back */ + mull SIGL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif PARANOID + + jz LDo_3rd_32_bits + +#ifdef PARANOID + cmpl $1,FPU_accum_2 + jne L_bugged_2 +#endif PARANOID + + /* need to subtract another once of the denom */ + movl SIGL(%ebx),%eax + movl SIGH(%ebx),%edx + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 + jne L_bugged_2 +#endif PARANOID + + addl $1,FPU_result_1 /* Correct the answer */ + adcl $0,FPU_result_2 + +#ifdef PARANOID + jc L_bugged_2 /* Must check for non-zero result here */ +#endif PARANOID + +/*----------------------------------------------------------------------*/ +/* The division is essentially finished here, we just need to perform + tidying operations. + Deal with the 3rd 32 bits */ +LDo_3rd_32_bits: + movl FPU_accum_1,%edx /* get the reduced num */ + movl FPU_accum_0,%eax + + /* need to check for possible subsequent overflow */ + cmpl SIGH(%ebx),%edx /* denom */ + jb LRound_prep + ja LPrevent_3rd_overflow + + cmpl SIGL(%ebx),%eax /* denom */ + jb LRound_prep + +LPrevent_3rd_overflow: + /* prevent overflow */ + subl SIGL(%ebx),%eax + sbbl SIGH(%ebx),%edx + movl %edx,FPU_accum_1 + movl %eax,FPU_accum_0 + + addl $1,FPU_result_1 /* Reflect the subtraction in the answer */ + adcl $0,FPU_result_2 + jne LRound_prep + jnc LRound_prep + + /* This is a tricky spot, there is an overflow of the answer */ + movb $255,FPU_ovfl_flag /* Overflow -> 1.000 */ + +LRound_prep: +/* + * Prepare for rounding. + * To test for rounding, we just need to compare 2*accum with the + * denom. + */ + movl FPU_accum_0,%ecx + movl FPU_accum_1,%edx + movl %ecx,%eax + orl %edx,%eax + jz LRound_ovfl /* The accumulator contains zero. */ + + /* Multiply by 2 */ + clc + rcll $1,%ecx + rcll $1,%edx + jc LRound_large /* No need to compare, denom smaller */ + + subl SIGL(%ebx),%ecx + sbbl SIGH(%ebx),%edx + jnc LRound_not_small + + movl $0x70000000,%eax /* Denom was larger */ + jmp LRound_ovfl + +LRound_not_small: + jnz LRound_large + + movl $0x80000000,%eax /* Remainder was exactly 1/2 denom */ + jmp LRound_ovfl + +LRound_large: + movl $0xff000000,%eax /* Denom was smaller */ + +LRound_ovfl: +/* We are now ready to deal with rounding, but first we must get + the bits properly aligned */ + testb $255,FPU_ovfl_flag /* was the num > denom ? */ + je LRound_precision + + incl EXP(%edi) + + /* shift the mantissa right one bit */ + stc /* Will set the ms bit */ + rcrl FPU_result_2 + rcrl FPU_result_1 + rcrl %eax + +/* Round the result as required */ +LRound_precision: + decl EXP(%edi) /* binary point between 1st & 2nd bits */ + + movl %eax,%edx + movl FPU_result_1,%ebx + movl FPU_result_2,%eax + jmp fpu_reg_round + + +#ifdef PARANOID +/* The logic is wrong if we got here */ +L_bugged: + pushl EX_INTERNAL|0x202 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_1: + pushl EX_INTERNAL|0x203 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_2: + pushl EX_INTERNAL|0x204 + call EXCEPTION + pop %ebx + jmp L_exit + +L_exit: + popl %ebx + popl %edi + popl %esi + + leave + ret +#endif PARANOID diff --git a/arch/i386/math-emu/reg_u_mul.S b/arch/i386/math-emu/reg_u_mul.S new file mode 100644 index 000000000..8250666bd --- /dev/null +++ b/arch/i386/math-emu/reg_u_mul.S @@ -0,0 +1,163 @@ + .file "reg_u_mul.S" +/*---------------------------------------------------------------------------+ + | reg_u_mul.S | + | | + | Core multiplication routine | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Basic multiplication routine. | + | Does not check the resulting exponent for overflow/underflow | + | | + | reg_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw); | + | | + | Internal working is at approx 128 bits. | + | Result is rounded to nearest 53 or 64 bits, using "nearest or even". | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_asm.h" +#include "control_w.h" + + + +#ifndef NON_REENTRANT_FPU +/* Local storage on the stack: */ +#define FPU_accum_0 -4(%ebp) /* ms word */ +#define FPU_accum_1 -8(%ebp) + +#else +/* Local storage in a static area: */ +.data + .align 4,0 +FPU_accum_0: + .long 0 +FPU_accum_1: + .long 0 +#endif NON_REENTRANT_FPU + + +.text + .align 2,144 + +.globl _reg_u_mul +_reg_u_mul: + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $8,%esp +#endif NON_REENTRANT_FPU + + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi + movl PARAM2,%edi + +#ifdef PARANOID + testl $0x80000000,SIGH(%esi) + jz L_bugged + testl $0x80000000,SIGH(%edi) + jz L_bugged +#endif PARANOID + +#ifdef DENORM_OPERAND + movl EXP(%esi),%eax + cmpl EXP_UNDER,%eax + jg xOp1_not_denorm + + call _denormal_operand + orl %eax,%eax + jnz fpu_Arith_exit + +xOp1_not_denorm: + movl EXP(%edi),%eax + cmpl EXP_UNDER,%eax + jg xOp2_not_denorm + + call _denormal_operand + orl %eax,%eax + jnz fpu_Arith_exit + +xOp2_not_denorm: +#endif DENORM_OPERAND + + xorl %ecx,%ecx + xorl %ebx,%ebx + + movl SIGL(%esi),%eax + mull SIGL(%edi) + movl %eax,FPU_accum_0 + movl %edx,FPU_accum_1 + + movl SIGL(%esi),%eax + mull SIGH(%edi) + addl %eax,FPU_accum_1 + adcl %edx,%ebx +/* adcl $0,%ecx // overflow here is not possible */ + + movl SIGH(%esi),%eax + mull SIGL(%edi) + addl %eax,FPU_accum_1 + adcl %edx,%ebx + adcl $0,%ecx + + movl SIGH(%esi),%eax + mull SIGH(%edi) + addl %eax,%ebx + adcl %edx,%ecx + + movl EXP(%esi),%eax /* Compute the exponent */ + addl EXP(%edi),%eax + subl EXP_BIAS-1,%eax + +/* Have now finished with the sources */ + movl PARAM3,%edi /* Point to the destination */ + movl %eax,EXP(%edi) + +/* Now make sure that the result is normalized */ + testl $0x80000000,%ecx + jnz LResult_Normalised + + /* Normalize by shifting left one bit */ + shll $1,FPU_accum_0 + rcll $1,FPU_accum_1 + rcll $1,%ebx + rcll $1,%ecx + decl EXP(%edi) + +LResult_Normalised: + movl FPU_accum_0,%eax + movl FPU_accum_1,%edx + orl %eax,%eax + jz L_extent_zero + + orl $1,%edx + +L_extent_zero: + movl %ecx,%eax + jmp fpu_reg_round + + +#ifdef PARANOID +L_bugged: + pushl EX_INTERNAL|0x205 + call EXCEPTION + pop %ebx + jmp L_exit + +L_exit: + popl %ebx + popl %edi + popl %esi + leave + ret +#endif PARANOID + diff --git a/arch/i386/math-emu/reg_u_sub.S b/arch/i386/math-emu/reg_u_sub.S new file mode 100644 index 000000000..fbec17dfb --- /dev/null +++ b/arch/i386/math-emu/reg_u_sub.S @@ -0,0 +1,292 @@ + .file "reg_u_sub.S" +/*---------------------------------------------------------------------------+ + | reg_u_sub.S | + | | + | Core floating point subtraction routine. | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | Call from C as: | + | void reg_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | + | int control_w) | + | | + +---------------------------------------------------------------------------*/ + +/* + | Kernel subtraction routine reg_u_sub(reg *arg1, reg *arg2, reg *answ). + | Takes two valid reg f.p. numbers (TW_Valid), which are + | treated as unsigned numbers, + | and returns their difference as a TW_Valid or TW_Zero f.p. + | number. + | The first number (arg1) must be the larger. + | The returned number is normalized. + | Basic checks are performed if PARANOID is defined. + */ + +#include "exception.h" +#include "fpu_asm.h" +#include "control_w.h" + +.text + .align 2,144 +.globl _reg_u_sub +_reg_u_sub: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi /* source 1 */ + movl PARAM2,%edi /* source 2 */ + +#ifdef DENORM_OPERAND + cmpl EXP_UNDER,EXP(%esi) + jg xOp1_not_denorm + + call _denormal_operand + orl %eax,%eax + jnz fpu_Arith_exit + +xOp1_not_denorm: + cmpl EXP_UNDER,EXP(%edi) + jg xOp2_not_denorm + + call _denormal_operand + orl %eax,%eax + jnz fpu_Arith_exit + +xOp2_not_denorm: +#endif DENORM_OPERAND + + movl EXP(%esi),%ecx + subl EXP(%edi),%ecx /* exp1 - exp2 */ + +#ifdef PARANOID + /* source 2 is always smaller than source 1 */ + js L_bugged_1 + + testl $0x80000000,SIGH(%edi) /* The args are assumed to be be normalized */ + je L_bugged_2 + + testl $0x80000000,SIGH(%esi) + je L_bugged_2 +#endif PARANOID + +/*--------------------------------------+ + | Form a register holding the | + | smaller number | + +--------------------------------------*/ + movl SIGH(%edi),%eax /* register ms word */ + movl SIGL(%edi),%ebx /* register ls word */ + + movl PARAM3,%edi /* destination */ + movl EXP(%esi),%edx + movl %edx,EXP(%edi) /* Copy exponent to destination */ +/* movb SIGN(%esi),%dl + movb %dl,SIGN(%edi) */ /* Copy the sign from the first arg */ + + xorl %edx,%edx /* register extension */ + +/*--------------------------------------+ + | Shift the temporary register | + | right the required number of | + | places. | + +--------------------------------------*/ +L_shift_r: + cmpl $32,%ecx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + jmp L_shift_done + +L_more_than_31: + cmpl $64,%ecx + jnc L_more_than_63 + + subb $32,%cl + jz L_exactly_32 + + shrd %cl,%eax,%edx + shr %cl,%eax + orl %ebx,%ebx + jz L_more_31_no_low /* none of the lowest bits is set */ + + orl $1,%edx /* record the fact in the extension */ + +L_more_31_no_low: + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_exactly_32: + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_more_than_63: + cmpw $65,%cx + jnc L_more_than_64 + + /* Shift right by 64 bits */ + movl %eax,%edx + orl %ebx,%ebx + jz L_more_63_no_low + + orl $1,%edx + jmp L_more_63_no_low + +L_more_than_64: + jne L_more_than_65 + + /* Shift right by 65 bits */ + /* Carry is clear if we get here */ + movl %eax,%edx + rcrl %edx + jnc L_shift_65_nc + + orl $1,%edx + jmp L_more_63_no_low + +L_shift_65_nc: + orl %ebx,%ebx + jz L_more_63_no_low + + orl $1,%edx + jmp L_more_63_no_low + +L_more_than_65: + movl $1,%edx /* The shifted nr always at least one '1' */ + +L_more_63_no_low: + xorl %ebx,%ebx + xorl %eax,%eax + +L_shift_done: +L_subtr: +/*------------------------------+ + | Do the subtraction | + +------------------------------*/ + xorl %ecx,%ecx + subl %edx,%ecx + movl %ecx,%edx + movl SIGL(%esi),%ecx + sbbl %ebx,%ecx + movl %ecx,%ebx + movl SIGH(%esi),%ecx + sbbl %eax,%ecx + movl %ecx,%eax + +#ifdef PARANOID + /* We can never get a borrow */ + jc L_bugged +#endif PARANOID + +/*--------------------------------------+ + | Normalize the result | + +--------------------------------------*/ + testl $0x80000000,%eax + jnz L_round /* no shifting needed */ + + orl %eax,%eax + jnz L_shift_1 /* shift left 1 - 31 bits */ + + orl %ebx,%ebx + jnz L_shift_32 /* shift left 32 - 63 bits */ + +/* + * A rare case, the only one which is non-zero if we got here + * is: 1000000 .... 0000 + * -0111111 .... 1111 1 + * -------------------- + * 0000000 .... 0000 1 + */ + + cmpl $0x80000000,%edx + jnz L_must_be_zero + + /* Shift left 64 bits */ + subl $64,EXP(%edi) + xchg %edx,%eax + jmp fpu_reg_round + +L_must_be_zero: +#ifdef PARANOID + orl %edx,%edx + jnz L_bugged_3 +#endif PARANOID + + /* The result is zero */ + movb TW_Zero,TAG(%edi) + movl $0,EXP(%edi) /* exponent */ + movl $0,SIGL(%edi) + movl $0,SIGH(%edi) + jmp L_exit /* %eax contains zero */ + +L_shift_32: + movl %ebx,%eax + movl %edx,%ebx + movl $0,%edx + subl $32,EXP(%edi) /* Can get underflow here */ + +/* We need to shift left by 1 - 31 bits */ +L_shift_1: + bsrl %eax,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + shld %cl,%ebx,%eax + shld %cl,%edx,%ebx + shl %cl,%edx + subl %ecx,EXP(%edi) /* Can get underflow here */ + +L_round: + jmp fpu_reg_round /* Round the result */ + + +#ifdef PARANOID +L_bugged_1: + pushl EX_INTERNAL|0x206 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged_2: + pushl EX_INTERNAL|0x209 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged_3: + pushl EX_INTERNAL|0x210 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged_4: + pushl EX_INTERNAL|0x211 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged: + pushl EX_INTERNAL|0x212 + call EXCEPTION + pop %ebx + jmp L_error_exit +#endif PARANOID + + +L_error_exit: + movl $1,%eax +L_exit: + popl %ebx + popl %edi + popl %esi + leave + ret diff --git a/arch/i386/math-emu/round_Xsig.S b/arch/i386/math-emu/round_Xsig.S new file mode 100644 index 000000000..163755878 --- /dev/null +++ b/arch/i386/math-emu/round_Xsig.S @@ -0,0 +1,148 @@ +/*---------------------------------------------------------------------------+ + | round_Xsig.S | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | Normalize and round a 12 byte quantity. | + | Call from C as: | + | int round_Xsig(Xsig *n) | + | | + | Normalize a 12 byte quantity. | + | Call from C as: | + | int norm_Xsig(Xsig *n) | + | | + | Each function returns the size of the shift (nr of bits). | + | | + +---------------------------------------------------------------------------*/ + .file "round_Xsig.S" + +#include "fpu_asm.h" + + +.text + + .align 2,144 +.globl _round_Xsig + +_round_Xsig: + pushl %ebp + movl %esp,%ebp + pushl %ebx /* Reserve some space */ + pushl %ebx + pushl %esi + + movl PARAM1,%esi + + movl 8(%esi),%edx + movl 4(%esi),%ebx + movl (%esi),%eax + + movl $0,-4(%ebp) + + orl %edx,%edx /* ms bits */ + js L_round /* Already normalized */ + jnz L_shift_1 /* Shift left 1 - 31 bits */ + + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + movl $-32,-4(%ebp) + +/* We need to shift left by 1 - 31 bits */ +L_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + subl %ecx,-4(%ebp) + shld %cl,%ebx,%edx + shld %cl,%eax,%ebx + shl %cl,%eax + +L_round: + testl $0x80000000,%eax + jz L_exit + + addl $1,%ebx + adcl $0,%edx + jnz L_exit + + movl $0x80000000,%edx + incl -4(%ebp) + +L_exit: + movl %edx,8(%esi) + movl %ebx,4(%esi) + movl %eax,(%esi) + + movl -4(%ebp),%eax + + popl %esi + popl %ebx + leave + ret + + + + + .align 2,144 +.globl _norm_Xsig + +_norm_Xsig: + pushl %ebp + movl %esp,%ebp + pushl %ebx /* Reserve some space */ + pushl %ebx + pushl %esi + + movl PARAM1,%esi + + movl 8(%esi),%edx + movl 4(%esi),%ebx + movl (%esi),%eax + + movl $0,-4(%ebp) + + orl %edx,%edx /* ms bits */ + js L_n_exit /* Already normalized */ + jnz L_n_shift_1 /* Shift left 1 - 31 bits */ + + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + movl $-32,-4(%ebp) + + orl %edx,%edx /* ms bits */ + js L_n_exit /* Normalized now */ + jnz L_n_shift_1 /* Shift left 1 - 31 bits */ + + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + addl $-32,-4(%ebp) + jmp L_n_exit /* Might not be normalized, + but shift no more. */ + +/* We need to shift left by 1 - 31 bits */ +L_n_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + subl %ecx,-4(%ebp) + shld %cl,%ebx,%edx + shld %cl,%eax,%ebx + shl %cl,%eax + +L_n_exit: + movl %edx,8(%esi) + movl %ebx,4(%esi) + movl %eax,(%esi) + + movl -4(%ebp),%eax + + popl %esi + popl %ebx + leave + ret + diff --git a/arch/i386/math-emu/shr_Xsig.S b/arch/i386/math-emu/shr_Xsig.S new file mode 100644 index 000000000..d6724a204 --- /dev/null +++ b/arch/i386/math-emu/shr_Xsig.S @@ -0,0 +1,90 @@ + .file "shr_Xsig.S" +/*---------------------------------------------------------------------------+ + | shr_Xsig.S | + | | + | 12 byte right shift function | + | | + | Copyright (C) 1992,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | Call from C as: | + | void shr_Xsig(Xsig *arg, unsigned nr) | + | | + | Extended shift right function. | + | Fastest for small shifts. | + | Shifts the 12 byte quantity pointed to by the first arg (arg) | + | right by the number of bits specified by the second arg (nr). | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_asm.h" + +.text + .align 2,144 + + .globl _shr_Xsig +_shr_Xsig: + push %ebp + movl %esp,%ebp + pushl %esi + movl PARAM2,%ecx + movl PARAM1,%esi + cmpl $32,%ecx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + pushl %ebx + movl (%esi),%eax /* lsl */ + movl 4(%esi),%ebx /* midl */ + movl 8(%esi),%edx /* msl */ + shrd %cl,%ebx,%eax + shrd %cl,%edx,%ebx + shr %cl,%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edx,8(%esi) + popl %ebx + popl %esi + leave + ret + +L_more_than_31: + cmpl $64,%ecx + jnc L_more_than_63 + + subb $32,%cl + movl 4(%esi),%eax /* midl */ + movl 8(%esi),%edx /* msl */ + shrd %cl,%edx,%eax + shr %cl,%edx + movl %eax,(%esi) + movl %edx,4(%esi) + movl $0,8(%esi) + popl %esi + leave + ret + +L_more_than_63: + cmpl $96,%ecx + jnc L_more_than_95 + + subb $64,%cl + movl 8(%esi),%eax /* msl */ + shr %cl,%eax + xorl %edx,%edx + movl %eax,(%esi) + movl %edx,4(%esi) + movl %edx,8(%esi) + popl %esi + leave + ret + +L_more_than_95: + xorl %eax,%eax + movl %eax,(%esi) + movl %eax,4(%esi) + movl %eax,8(%esi) + popl %esi + leave + ret diff --git a/arch/i386/math-emu/status_w.h b/arch/i386/math-emu/status_w.h new file mode 100644 index 000000000..96607d0e1 --- /dev/null +++ b/arch/i386/math-emu/status_w.h @@ -0,0 +1,65 @@ +/*---------------------------------------------------------------------------+ + | status_w.h | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _STATUS_H_ +#define _STATUS_H_ + +#include "fpu_emu.h" /* for definition of PECULIAR_486 */ + +#ifdef __ASSEMBLER__ +#define Const__(x) $##x +#else +#define Const__(x) x +#endif + +#define SW_Backward Const__(0x8000) /* backward compatibility */ +#define SW_C3 Const__(0x4000) /* condition bit 3 */ +#define SW_Top Const__(0x3800) /* top of stack */ +#define SW_Top_Shift Const__(11) /* shift for top of stack bits */ +#define SW_C2 Const__(0x0400) /* condition bit 2 */ +#define SW_C1 Const__(0x0200) /* condition bit 1 */ +#define SW_C0 Const__(0x0100) /* condition bit 0 */ +#define SW_Summary Const__(0x0080) /* exception summary */ +#define SW_Stack_Fault Const__(0x0040) /* stack fault */ +#define SW_Precision Const__(0x0020) /* loss of precision */ +#define SW_Underflow Const__(0x0010) /* underflow */ +#define SW_Overflow Const__(0x0008) /* overflow */ +#define SW_Zero_Div Const__(0x0004) /* divide by zero */ +#define SW_Denorm_Op Const__(0x0002) /* denormalized operand */ +#define SW_Invalid Const__(0x0001) /* invalid operation */ + +#define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */ + +#ifndef __ASSEMBLER__ + +#define COMP_A_gt_B 1 +#define COMP_A_eq_B 2 +#define COMP_A_lt_B 3 +#define COMP_No_Comp 4 +#define COMP_Denormal 0x20 +#define COMP_NaN 0x40 +#define COMP_SNaN 0x80 + +#define status_word() \ + ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top)) +#define setcc(cc) ({ \ + partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); \ + partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); }) + +#ifdef PECULIAR_486 + /* Default, this conveys no information, but an 80486 does it. */ + /* Clear the SW_C1 bit, "other bits undefined". */ +# define clear_C1() { partial_status &= ~SW_C1; } +# else +# define clear_C1() +#endif PECULIAR_486 + +#endif __ASSEMBLER__ + +#endif _STATUS_H_ diff --git a/arch/i386/math-emu/version.h b/arch/i386/math-emu/version.h new file mode 100644 index 000000000..4c75a4792 --- /dev/null +++ b/arch/i386/math-emu/version.h @@ -0,0 +1,12 @@ +/*---------------------------------------------------------------------------+ + | version.h | + | | + | | + | Copyright (C) 1992,1993,1994 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#define FPU_VERSION "wm-FPU-emu version 1.20" diff --git a/arch/i386/math-emu/wm_shrx.S b/arch/i386/math-emu/wm_shrx.S new file mode 100644 index 000000000..bef0e1963 --- /dev/null +++ b/arch/i386/math-emu/wm_shrx.S @@ -0,0 +1,208 @@ + .file "wm_shrx.S" +/*---------------------------------------------------------------------------+ + | wm_shrx.S | + | | + | 64 bit right shift functions | + | | + | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | Call from C as: | + | unsigned shrx(void *arg1, unsigned arg2) | + | and | + | unsigned shrxs(void *arg1, unsigned arg2) | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_asm.h" + +.text + .align 2,144 + +/*---------------------------------------------------------------------------+ + | unsigned shrx(void *arg1, unsigned arg2) | + | | + | Extended shift right function. | + | Fastest for small shifts. | + | Shifts the 64 bit quantity pointed to by the first arg (arg1) | + | right by the number of bits specified by the second arg (arg2). | + | Forms a 96 bit quantity from the 64 bit arg and eax: | + | [ 64 bit arg ][ eax ] | + | shift right ---------> | + | The eax register is initialized to 0 before the shifting. | + | Results returned in the 64 bit arg and eax. | + +---------------------------------------------------------------------------*/ + + .globl _shrx + +_shrx: + push %ebp + movl %esp,%ebp + pushl %esi + movl PARAM2,%ecx + movl PARAM1,%esi + cmpl $32,%ecx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + pushl %ebx + movl (%esi),%ebx /* lsl */ + movl 4(%esi),%edx /* msl */ + xorl %eax,%eax /* extension */ + shrd %cl,%ebx,%eax + shrd %cl,%edx,%ebx + shr %cl,%edx + movl %ebx,(%esi) + movl %edx,4(%esi) + popl %ebx + popl %esi + leave + ret + +L_more_than_31: + cmpl $64,%ecx + jnc L_more_than_63 + + subb $32,%cl + movl (%esi),%eax /* lsl */ + movl 4(%esi),%edx /* msl */ + shrd %cl,%edx,%eax + shr %cl,%edx + movl %edx,(%esi) + movl $0,4(%esi) + popl %esi + leave + ret + +L_more_than_63: + cmpl $96,%ecx + jnc L_more_than_95 + + subb $64,%cl + movl 4(%esi),%eax /* msl */ + shr %cl,%eax + xorl %edx,%edx + movl %edx,(%esi) + movl %edx,4(%esi) + popl %esi + leave + ret + +L_more_than_95: + xorl %eax,%eax + movl %eax,(%esi) + movl %eax,4(%esi) + popl %esi + leave + ret + + +/*---------------------------------------------------------------------------+ + | unsigned shrxs(void *arg1, unsigned arg2) | + | | + | Extended shift right function (optimized for small floating point | + | integers). | + | Shifts the 64 bit quantity pointed to by the first arg (arg1) | + | right by the number of bits specified by the second arg (arg2). | + | Forms a 96 bit quantity from the 64 bit arg and eax: | + | [ 64 bit arg ][ eax ] | + | shift right ---------> | + | The eax register is initialized to 0 before the shifting. | + | The lower 8 bits of eax are lost and replaced by a flag which is | + | set (to 0x01) if any bit, apart from the first one, is set in the | + | part which has been shifted out of the arg. | + | Results returned in the 64 bit arg and eax. | + +---------------------------------------------------------------------------*/ + .globl _shrxs +_shrxs: + push %ebp + movl %esp,%ebp + pushl %esi + pushl %ebx + movl PARAM2,%ecx + movl PARAM1,%esi + cmpl $64,%ecx /* shrd only works for 0..31 bits */ + jnc Ls_more_than_63 + + cmpl $32,%ecx /* shrd only works for 0..31 bits */ + jc Ls_less_than_32 + +/* We got here without jumps by assuming that the most common requirement + is for small integers */ +/* Shift by [32..63] bits */ + subb $32,%cl + movl (%esi),%eax /* lsl */ + movl 4(%esi),%edx /* msl */ + xorl %ebx,%ebx + shrd %cl,%eax,%ebx + shrd %cl,%edx,%eax + shr %cl,%edx + orl %ebx,%ebx /* test these 32 bits */ + setne %bl + test $0x7fffffff,%eax /* and 31 bits here */ + setne %bh + orw %bx,%bx /* Any of the 63 bit set ? */ + setne %al + movl %edx,(%esi) + movl $0,4(%esi) + popl %ebx + popl %esi + leave + ret + +/* Shift by [0..31] bits */ +Ls_less_than_32: + movl (%esi),%ebx /* lsl */ + movl 4(%esi),%edx /* msl */ + xorl %eax,%eax /* extension */ + shrd %cl,%ebx,%eax + shrd %cl,%edx,%ebx + shr %cl,%edx + test $0x7fffffff,%eax /* only need to look at eax here */ + setne %al + movl %ebx,(%esi) + movl %edx,4(%esi) + popl %ebx + popl %esi + leave + ret + +/* Shift by [64..95] bits */ +Ls_more_than_63: + cmpl $96,%ecx + jnc Ls_more_than_95 + + subb $64,%cl + movl (%esi),%ebx /* lsl */ + movl 4(%esi),%eax /* msl */ + xorl %edx,%edx /* extension */ + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + orl %ebx,%edx + setne %bl + test $0x7fffffff,%eax /* only need to look at eax here */ + setne %bh + orw %bx,%bx + setne %al + xorl %edx,%edx + movl %edx,(%esi) /* set to zero */ + movl %edx,4(%esi) /* set to zero */ + popl %ebx + popl %esi + leave + ret + +Ls_more_than_95: +/* Shift by [96..inf) bits */ + xorl %eax,%eax + movl (%esi),%ebx + orl 4(%esi),%ebx + setne %al + xorl %ebx,%ebx + movl %ebx,(%esi) + movl %ebx,4(%esi) + popl %ebx + popl %esi + leave + ret diff --git a/arch/i386/math-emu/wm_sqrt.S b/arch/i386/math-emu/wm_sqrt.S new file mode 100644 index 000000000..4e028cb80 --- /dev/null +++ b/arch/i386/math-emu/wm_sqrt.S @@ -0,0 +1,474 @@ + .file "wm_sqrt.S" +/*---------------------------------------------------------------------------+ + | wm_sqrt.S | + | | + | Fixed point arithmetic square root evaluation. | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + | Call from C as: | + | void wm_sqrt(FPU_REG *n, unsigned int control_word) | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | wm_sqrt(FPU_REG *n, unsigned int control_word) | + | returns the square root of n in n. | + | | + | Use Newton's method to compute the square root of a number, which must | + | be in the range [1.0 .. 4.0), to 64 bits accuracy. | + | Does not check the sign or tag of the argument. | + | Sets the exponent, but not the sign or tag of the result. | + | | + | The guess is kept in %esi:%edi | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_asm.h" + + +#ifndef NON_REENTRANT_FPU +/* Local storage on the stack: */ +#define FPU_accum_3 -4(%ebp) /* ms word */ +#define FPU_accum_2 -8(%ebp) +#define FPU_accum_1 -12(%ebp) +#define FPU_accum_0 -16(%ebp) + +/* + * The de-normalised argument: + * sq_2 sq_1 sq_0 + * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 + * ^ binary point here + */ +#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ +#define FPU_fsqrt_arg_1 -24(%ebp) +#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ + +#else +/* Local storage in a static area: */ +.data + .align 4,0 +FPU_accum_3: + .long 0 /* ms word */ +FPU_accum_2: + .long 0 +FPU_accum_1: + .long 0 +FPU_accum_0: + .long 0 + +/* The de-normalised argument: + sq_2 sq_1 sq_0 + b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 + ^ binary point here + */ +FPU_fsqrt_arg_2: + .long 0 /* ms word */ +FPU_fsqrt_arg_1: + .long 0 +FPU_fsqrt_arg_0: + .long 0 /* ls word, at most the ms bit is set */ +#endif NON_REENTRANT_FPU + + +.text + .align 2,144 + +.globl _wm_sqrt +_wm_sqrt: + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $28,%esp +#endif NON_REENTRANT_FPU + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi + + movl SIGH(%esi),%eax + movl SIGL(%esi),%ecx + xorl %edx,%edx + +/* We use a rough linear estimate for the first guess.. */ + + cmpl EXP_BIAS,EXP(%esi) + jnz sqrt_arg_ge_2 + + shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ + rcrl $1,%ecx + rcrl $1,%edx + +sqrt_arg_ge_2: +/* From here on, n is never accessed directly again until it is + replaced by the answer. */ + + movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ + movl %ecx,FPU_fsqrt_arg_1 + movl %edx,FPU_fsqrt_arg_0 + +/* Make a linear first estimate */ + shrl $1,%eax + addl $0x40000000,%eax + movl $0xaaaaaaaa,%ecx + mull %ecx + shll %edx /* max result was 7fff... */ + testl $0x80000000,%edx /* but min was 3fff... */ + jnz sqrt_prelim_no_adjust + + movl $0x80000000,%edx /* round up */ + +sqrt_prelim_no_adjust: + movl %edx,%esi /* Our first guess */ + +/* We have now computed (approx) (2 + x) / 3, which forms the basis + for a few iterations of Newton's method */ + + movl FPU_fsqrt_arg_2,%ecx /* ms word */ + +/* + * From our initial estimate, three iterations are enough to get us + * to 30 bits or so. This will then allow two iterations at better + * precision to complete the process. + */ + +/* Compute (g + n/g)/2 at each iteration (g is the guess). */ + shrl %ecx /* Doing this first will prevent a divide */ + /* overflow later. */ + + movl %ecx,%edx /* msw of the arg / 2 */ + divl %esi /* current estimate */ + shrl %esi /* divide by 2 */ + addl %eax,%esi /* the new estimate */ + + movl %ecx,%edx + divl %esi + shrl %esi + addl %eax,%esi + + movl %ecx,%edx + divl %esi + shrl %esi + addl %eax,%esi + +/* + * Now that an estimate accurate to about 30 bits has been obtained (in %esi), + * we improve it to 60 bits or so. + * + * The strategy from now on is to compute new estimates from + * guess := guess + (n - guess^2) / (2 * guess) + */ + +/* First, find the square of the guess */ + movl %esi,%eax + mull %esi +/* guess^2 now in %edx:%eax */ + + movl FPU_fsqrt_arg_1,%ecx + subl %ecx,%eax + movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ + sbbl %ecx,%edx + jnc sqrt_stage_2_positive + +/* Subtraction gives a negative result, + negate the result before division. */ + notl %edx + notl %eax + addl $1,%eax + adcl $0,%edx + + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + jmp sqrt_stage_2_finish + +sqrt_stage_2_positive: + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + + notl %ecx + notl %eax + addl $1,%eax + adcl $0,%ecx + +sqrt_stage_2_finish: + sarl $1,%ecx /* divide by 2 */ + rcrl $1,%eax + + /* Form the new estimate in %esi:%edi */ + movl %eax,%edi + addl %ecx,%esi + + jnz sqrt_stage_2_done /* result should be [1..2) */ + +#ifdef PARANOID +/* It should be possible to get here only if the arg is ffff....ffff */ + cmp $0xffffffff,FPU_fsqrt_arg_1 + jnz sqrt_stage_2_error +#endif PARANOID + +/* The best rounded result. */ + xorl %eax,%eax + decl %eax + movl %eax,%edi + movl %eax,%esi + movl $0x7fffffff,%eax + jmp sqrt_round_result + +#ifdef PARANOID +sqrt_stage_2_error: + pushl EX_INTERNAL|0x213 + call EXCEPTION +#endif PARANOID + +sqrt_stage_2_done: + +/* Now the square root has been computed to better than 60 bits. */ + +/* Find the square of the guess. */ + movl %edi,%eax /* ls word of guess */ + mull %edi + movl %edx,FPU_accum_1 + + movl %esi,%eax + mull %esi + movl %edx,FPU_accum_3 + movl %eax,FPU_accum_2 + + movl %edi,%eax + mull %esi + addl %eax,FPU_accum_1 + adcl %edx,FPU_accum_2 + adcl $0,FPU_accum_3 + +/* movl %esi,%eax */ +/* mull %edi */ + addl %eax,FPU_accum_1 + adcl %edx,FPU_accum_2 + adcl $0,FPU_accum_3 + +/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ + + movl FPU_fsqrt_arg_0,%eax /* get normalized n */ + subl %eax,FPU_accum_1 + movl FPU_fsqrt_arg_1,%eax + sbbl %eax,FPU_accum_2 + movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ + sbbl %eax,FPU_accum_3 + jnc sqrt_stage_3_positive + +/* Subtraction gives a negative result, + negate the result before division */ + notl FPU_accum_1 + notl FPU_accum_2 + notl FPU_accum_3 + addl $1,FPU_accum_1 + adcl $0,FPU_accum_2 + +#ifdef PARANOID + adcl $0,FPU_accum_3 /* This must be zero */ + jz sqrt_stage_3_no_error + +sqrt_stage_3_error: + pushl EX_INTERNAL|0x207 + call EXCEPTION + +sqrt_stage_3_no_error: +#endif PARANOID + + movl FPU_accum_2,%edx + movl FPU_accum_1,%eax + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + + sarl $1,%ecx /* divide by 2 */ + rcrl $1,%eax + + /* prepare to round the result */ + + addl %ecx,%edi + adcl $0,%esi + + jmp sqrt_stage_3_finished + +sqrt_stage_3_positive: + movl FPU_accum_2,%edx + movl FPU_accum_1,%eax + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + + sarl $1,%ecx /* divide by 2 */ + rcrl $1,%eax + + /* prepare to round the result */ + + notl %eax /* Negate the correction term */ + notl %ecx + addl $1,%eax + adcl $0,%ecx /* carry here ==> correction == 0 */ + adcl $0xffffffff,%esi + + addl %ecx,%edi + adcl $0,%esi + +sqrt_stage_3_finished: + +/* + * The result in %esi:%edi:%esi should be good to about 90 bits here, + * and the rounding information here does not have sufficient accuracy + * in a few rare cases. + */ + cmpl $0xffffffe0,%eax + ja sqrt_near_exact_x + + cmpl $0x00000020,%eax + jb sqrt_near_exact + + cmpl $0x7fffffe0,%eax + jb sqrt_round_result + + cmpl $0x80000020,%eax + jb sqrt_get_more_precision + +sqrt_round_result: +/* Set up for rounding operations */ + movl %eax,%edx + movl %esi,%eax + movl %edi,%ebx + movl PARAM1,%edi + movl EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ + movl PARAM2,%ecx + jmp fpu_reg_round_sqrt + + +sqrt_near_exact_x: +/* First, the estimate must be rounded up. */ + addl $1,%edi + adcl $0,%esi + +sqrt_near_exact: +/* + * This is an easy case because x^1/2 is monotonic. + * We need just find the square of our estimate, compare it + * with the argument, and deduce whether our estimate is + * above, below, or exact. We use the fact that the estimate + * is known to be accurate to about 90 bits. + */ + movl %edi,%eax /* ls word of guess */ + mull %edi + movl %edx,%ebx /* 2nd ls word of square */ + movl %eax,%ecx /* ls word of square */ + + movl %edi,%eax + mull %esi + addl %eax,%ebx + addl %eax,%ebx + +#ifdef PARANOID + cmp $0xffffffb0,%ebx + jb sqrt_near_exact_ok + + cmp $0x00000050,%ebx + ja sqrt_near_exact_ok + + pushl EX_INTERNAL|0x214 + call EXCEPTION + +sqrt_near_exact_ok: +#endif PARANOID + + or %ebx,%ebx + js sqrt_near_exact_small + + jnz sqrt_near_exact_large + + or %ebx,%edx + jnz sqrt_near_exact_large + +/* Our estimate is exactly the right answer */ + xorl %eax,%eax + jmp sqrt_round_result + +sqrt_near_exact_small: +/* Our estimate is too small */ + movl $0x000000ff,%eax + jmp sqrt_round_result + +sqrt_near_exact_large: +/* Our estimate is too large, we need to decrement it */ + subl $1,%edi + sbbl $0,%esi + movl $0xffffff00,%eax + jmp sqrt_round_result + + +sqrt_get_more_precision: +/* This case is almost the same as the above, except we start + with an extra bit of precision in the estimate. */ + stc /* The extra bit. */ + rcll $1,%edi /* Shift the estimate left one bit */ + rcll $1,%esi + + movl %edi,%eax /* ls word of guess */ + mull %edi + movl %edx,%ebx /* 2nd ls word of square */ + movl %eax,%ecx /* ls word of square */ + + movl %edi,%eax + mull %esi + addl %eax,%ebx + addl %eax,%ebx + +/* Put our estimate back to its original value */ + stc /* The ms bit. */ + rcrl $1,%esi /* Shift the estimate left one bit */ + rcrl $1,%edi + +#ifdef PARANOID + cmp $0xffffff60,%ebx + jb sqrt_more_prec_ok + + cmp $0x000000a0,%ebx + ja sqrt_more_prec_ok + + pushl EX_INTERNAL|0x215 + call EXCEPTION + +sqrt_more_prec_ok: +#endif PARANOID + + or %ebx,%ebx + js sqrt_more_prec_small + + jnz sqrt_more_prec_large + + or %ebx,%ecx + jnz sqrt_more_prec_large + +/* Our estimate is exactly the right answer */ + movl $0x80000000,%eax + jmp sqrt_round_result + +sqrt_more_prec_small: +/* Our estimate is too small */ + movl $0x800000ff,%eax + jmp sqrt_round_result + +sqrt_more_prec_large: +/* Our estimate is too large */ + movl $0x7fffff00,%eax + jmp sqrt_round_result |