From 33263fc5f9ac8e8cb2b22d06af3ce5ac1dd815e4 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 4 Feb 2000 07:40:19 +0000 Subject: Merge with Linux 2.3.32. --- arch/alpha/math-emu/Makefile | 10 +- arch/alpha/math-emu/fp-emul.c | 354 ---------- arch/alpha/math-emu/fp-emul.h | 10 - arch/alpha/math-emu/ieee-math.c | 1382 --------------------------------------- arch/alpha/math-emu/ieee-math.h | 54 -- arch/alpha/math-emu/math.c | 447 +++++++++++++ arch/alpha/math-emu/sfp-util.h | 40 ++ 7 files changed, 495 insertions(+), 1802 deletions(-) delete mode 100644 arch/alpha/math-emu/fp-emul.c delete mode 100644 arch/alpha/math-emu/fp-emul.h delete mode 100644 arch/alpha/math-emu/ieee-math.c delete mode 100644 arch/alpha/math-emu/ieee-math.h create mode 100644 arch/alpha/math-emu/math.c create mode 100644 arch/alpha/math-emu/sfp-util.h (limited to 'arch/alpha/math-emu') diff --git a/arch/alpha/math-emu/Makefile b/arch/alpha/math-emu/Makefile index 6ea85465f..b5fc37765 100644 --- a/arch/alpha/math-emu/Makefile +++ b/arch/alpha/math-emu/Makefile @@ -1,9 +1,15 @@ # -# Makefile for math-emulator files... +# Makefile for the FPU instruction emulation. # +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... O_TARGET := math-emu.o -O_OBJS := fp-emul.o ieee-math.o +O_OBJS := math.o +CFLAGS += -I. -I$(TOPDIR)/include/math-emu -w ifeq ($(CONFIG_MATHEMU),m) M_OBJS := $(O_TARGET) diff --git a/arch/alpha/math-emu/fp-emul.c b/arch/alpha/math-emu/fp-emul.c deleted file mode 100644 index c2ebbbb60..000000000 --- a/arch/alpha/math-emu/fp-emul.c +++ /dev/null @@ -1,354 +0,0 @@ -#include -#include -#include -#include - -#include - -#include "ieee-math.h" - -#define OPC_PAL 0x00 - -#define OPC_INTA 0x10 -#define OPC_INTL 0x11 -#define OPC_INTS 0x12 -#define OPC_INTM 0x13 -#define OPC_FLTC 0x14 -#define OPC_FLTV 0x15 -#define OPC_FLTI 0x16 -#define OPC_FLTL 0x17 - -#define OPC_MISC 0x18 - -#define OPC_JSR 0x1a - -#define OP_FUN(OP,FUN) ((OP << 26) | (FUN << 5)) - -/* - * "Base" function codes for the FLTI-class instructions. - * Note that in most cases these actually correspond to the "chopped" - * form of the instruction. Not to worry---we extract the qualifier - * bits separately and deal with them separately. Notice that base - * function code 0x2c is used for both CVTTS and CVTST. The other bits - * in the function code are used to distinguish the two. - */ -#define FLTI_FUNC_ADDS OP_FUN(OPC_FLTI, 0x000) -#define FLTI_FUNC_ADDT OP_FUN(OPC_FLTI, 0x020) -#define FLTI_FUNC_CMPTEQ OP_FUN(OPC_FLTI, 0x025) -#define FLTI_FUNC_CMPTLT OP_FUN(OPC_FLTI, 0x026) -#define FLTI_FUNC_CMPTLE OP_FUN(OPC_FLTI, 0x027) -#define FLTI_FUNC_CMPTUN OP_FUN(OPC_FLTI, 0x024) -#define FLTI_FUNC_CVTTS_or_CVTST OP_FUN(OPC_FLTI, 0x02c) -#define FLTI_FUNC_CVTTQ OP_FUN(OPC_FLTI, 0x02f) -#define FLTI_FUNC_CVTQS OP_FUN(OPC_FLTI, 0x03c) -#define FLTI_FUNC_CVTQT OP_FUN(OPC_FLTI, 0x03e) -#define FLTI_FUNC_DIVS OP_FUN(OPC_FLTI, 0x003) -#define FLTI_FUNC_DIVT OP_FUN(OPC_FLTI, 0x023) -#define FLTI_FUNC_MULS OP_FUN(OPC_FLTI, 0x002) -#define FLTI_FUNC_MULT OP_FUN(OPC_FLTI, 0x022) -#define FLTI_FUNC_SUBS OP_FUN(OPC_FLTI, 0x001) -#define FLTI_FUNC_SUBT OP_FUN(OPC_FLTI, 0x021) - -#define FLTC_FUNC_SQRTS OP_FUN(OPC_FLTC, 0x00B) -#define FLTC_FUNC_SQRTT OP_FUN(OPC_FLTC, 0x02B) - -#define FLTL_FUNC_CVTQL OP_FUN(OPC_FLTL, 0x030) - -#define MISC_TRAPB 0x0000 -#define MISC_EXCB 0x0400 - -extern unsigned long alpha_read_fp_reg (unsigned long reg); -extern void alpha_write_fp_reg (unsigned long reg, unsigned long val); - - -#ifdef MODULE - -MODULE_DESCRIPTION("FP Software completion module"); - -extern long (*alpha_fp_emul_imprecise)(struct pt_regs *, unsigned long); -extern long (*alpha_fp_emul) (unsigned long pc); - -static long (*save_emul_imprecise)(struct pt_regs *, unsigned long); -static long (*save_emul) (unsigned long pc); - -long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long); -long do_alpha_fp_emul(unsigned long); - -int init_module(void) -{ - save_emul_imprecise = alpha_fp_emul_imprecise; - save_emul = alpha_fp_emul; - alpha_fp_emul_imprecise = do_alpha_fp_emul_imprecise; - alpha_fp_emul = do_alpha_fp_emul; - return 0; -} - -void cleanup_module(void) -{ - alpha_fp_emul_imprecise = save_emul_imprecise; - alpha_fp_emul = save_emul; -} - -#undef alpha_fp_emul_imprecise -#define alpha_fp_emul_imprecise do_alpha_fp_emul_imprecise -#undef alpha_fp_emul -#define alpha_fp_emul do_alpha_fp_emul - -#endif /* MODULE */ - -/* - * Emulate the floating point instruction at address PC. Returns 0 if - * emulation fails. Notice that the kernel does not and cannot use FP - * regs. This is good because it means that instead of - * saving/restoring all fp regs, we simply stick the result of the - * operation into the appropriate register. - */ -long -alpha_fp_emul (unsigned long pc) -{ - unsigned long op_fun, fa, fb, fc, func, mode; - unsigned long fpcw = current->thread.flags; - unsigned long va, vb, vc, res, fpcr; - __u32 insn; - - MOD_INC_USE_COUNT; - - get_user(insn, (__u32*)pc); - fc = (insn >> 0) & 0x1f; /* destination register */ - fb = (insn >> 16) & 0x1f; - fa = (insn >> 21) & 0x1f; - func = (insn >> 5) & 0x7ff; - mode = (insn >> 5) & 0xc0; - op_fun = insn & OP_FUN(0x3f, 0x3f); - - va = alpha_read_fp_reg(fa); - vb = alpha_read_fp_reg(fb); - fpcr = rdfpcr(); - - /* - * Try the operation in software. First, obtain the rounding - * mode... - */ - if (mode == 0xc0) { - /* dynamic---get rounding mode from fpcr: */ - mode = ((fpcr & FPCR_DYN_MASK) >> FPCR_DYN_SHIFT) << ROUND_SHIFT; - } - mode |= (fpcw & IEEE_TRAP_ENABLE_MASK); - - if ((IEEE_TRAP_ENABLE_MASK & 0xc0)) { - extern int something_is_wrong (void); - something_is_wrong(); - } - - switch (op_fun) { - case FLTI_FUNC_CMPTEQ: - res = ieee_CMPTEQ(va, vb, &vc); - break; - - case FLTI_FUNC_CMPTLT: - res = ieee_CMPTLT(va, vb, &vc); - break; - - case FLTI_FUNC_CMPTLE: - res = ieee_CMPTLE(va, vb, &vc); - break; - - case FLTI_FUNC_CMPTUN: - res = ieee_CMPTUN(va, vb, &vc); - break; - - case FLTL_FUNC_CVTQL: - /* - * Notice: We can get here only due to an integer - * overflow. Such overflows are reported as invalid - * ops. We return the result the hw would have - * computed. - */ - vc = ((vb & 0xc0000000) << 32 | /* sign and msb */ - (vb & 0x3fffffff) << 29); /* rest of the integer */ - res = FPCR_INV; - break; - - case FLTI_FUNC_CVTQS: - res = ieee_CVTQS(mode, vb, &vc); - break; - - case FLTI_FUNC_CVTQT: - res = ieee_CVTQT(mode, vb, &vc); - break; - - case FLTI_FUNC_CVTTS_or_CVTST: - if (func == 0x6ac) { - /* - * 0x2ac is also CVTST, but if the /S - * qualifier isn't set, we wouldn't be here in - * the first place... - */ - res = ieee_CVTST(mode, vb, &vc); - } else { - res = ieee_CVTTS(mode, vb, &vc); - } - break; - - case FLTI_FUNC_DIVS: - res = ieee_DIVS(mode, va, vb, &vc); - break; - - case FLTI_FUNC_DIVT: - res = ieee_DIVT(mode, va, vb, &vc); - break; - - case FLTI_FUNC_MULS: - res = ieee_MULS(mode, va, vb, &vc); - break; - - case FLTI_FUNC_MULT: - res = ieee_MULT(mode, va, vb, &vc); - break; - - case FLTI_FUNC_SUBS: - res = ieee_SUBS(mode, va, vb, &vc); - break; - - case FLTI_FUNC_SUBT: - res = ieee_SUBT(mode, va, vb, &vc); - break; - - case FLTI_FUNC_ADDS: - res = ieee_ADDS(mode, va, vb, &vc); - break; - - case FLTI_FUNC_ADDT: - res = ieee_ADDT(mode, va, vb, &vc); - break; - - case FLTI_FUNC_CVTTQ: - res = ieee_CVTTQ(mode, vb, &vc); - break; - - case FLTC_FUNC_SQRTS: - res = ieee_SQRTS(mode, vb, &vc); - break; - - case FLTC_FUNC_SQRTT: - res = ieee_SQRTT(mode, vb, &vc); - break; - - default: - printk("alpha_fp_emul: unexpected function code %#lx at %#lx\n", - func & 0x3f, pc); - MOD_DEC_USE_COUNT; - return 0; - } - - /* - * Take the appropriate action for each possible - * floating-point result: - * - * - Set the appropriate bits in the FPCR - * - If the specified exception is enabled in the FPCR, - * return. The caller (entArith) will dispatch - * the appropriate signal to the translated program. - * - * In addition, properly track the exception state in software - * as described in the Alpha Architectre Handbook section 4.7.7.3. - */ - if (res) { - /* Record exceptions in software control word. */ - current->thread.flags = fpcw |= res >> 35; - - /* Update hardware control register */ - fpcr &= (~FPCR_MASK | FPCR_DYN_MASK); - fpcr |= ieee_swcr_to_fpcr(fpcw); - wrfpcr(fpcr); - - /* Do we generate a signal? */ - if (res >> 51 & fpcw & IEEE_TRAP_ENABLE_MASK) { - MOD_DEC_USE_COUNT; - return 0; - } - } - - /* - * Whoo-kay... we got this far, and we're not generating a signal - * to the translated program. All that remains is to write the - * result: - */ - alpha_write_fp_reg(fc, vc); - - MOD_DEC_USE_COUNT; - return 1; -} - - -long -alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask) -{ - unsigned long trigger_pc = regs->pc - 4; - unsigned long insn, opcode, rc; - - MOD_INC_USE_COUNT; - - /* - * Turn off the bits corresponding to registers that are the - * target of instructions that set bits in the exception - * summary register. We have some slack doing this because a - * register that is the target of a trapping instruction can - * be written at most once in the trap shadow. - * - * Branches, jumps, TRAPBs, EXCBs and calls to PALcode all - * bound the trap shadow, so we need not look any further than - * up to the first occurrence of such an instruction. - */ - while (write_mask) { - get_user(insn, (__u32*)(trigger_pc)); - opcode = insn >> 26; - rc = insn & 0x1f; - - switch (opcode) { - case OPC_PAL: - case OPC_JSR: - case 0x30 ... 0x3f: /* branches */ - MOD_DEC_USE_COUNT; - return 0; - - case OPC_MISC: - switch (insn & 0xffff) { - case MISC_TRAPB: - case MISC_EXCB: - MOD_DEC_USE_COUNT; - return 0; - - default: - break; - } - break; - - case OPC_INTA: - case OPC_INTL: - case OPC_INTS: - case OPC_INTM: - write_mask &= ~(1UL << rc); - break; - - case OPC_FLTC: - case OPC_FLTV: - case OPC_FLTI: - case OPC_FLTL: - write_mask &= ~(1UL << (rc + 32)); - break; - } - if (!write_mask) { - if (alpha_fp_emul(trigger_pc)) { - /* re-execute insns in trap-shadow: */ - regs->pc = trigger_pc + 4; - MOD_DEC_USE_COUNT; - return 1; - } - break; - } - trigger_pc -= 4; - } - MOD_DEC_USE_COUNT; - return 0; -} diff --git a/arch/alpha/math-emu/fp-emul.h b/arch/alpha/math-emu/fp-emul.h deleted file mode 100644 index 12965fedc..000000000 --- a/arch/alpha/math-emu/fp-emul.h +++ /dev/null @@ -1,10 +0,0 @@ -/* - * These defines correspond to the dynamic rounding mode bits in the - * Floating Point Control Register. They also happen to correspond to - * the instruction encodings except that 0x03 signifies dynamic - * rounding mode in that case. - */ -#define ROUND_CHOP 0x00 /* chopped (aka round towards zero) */ -#define ROUND_NINF 0x01 /* round towards negative infinity */ -#define ROUND_NEAR 0x02 /* round towards nearest number */ -#define ROUND_PINF 0x03 /* round towards positive infinity */ diff --git a/arch/alpha/math-emu/ieee-math.c b/arch/alpha/math-emu/ieee-math.c deleted file mode 100644 index 9a86b1048..000000000 --- a/arch/alpha/math-emu/ieee-math.c +++ /dev/null @@ -1,1382 +0,0 @@ -/* - * ieee-math.c - IEEE floating point emulation code - * Copyright (C) 1989,1990,1991,1995 by - * Digital Equipment Corporation, Maynard, Massachusetts. - * - * Heavily modified for Linux/Alpha. Changes are Copyright (c) 1995 - * by David Mosberger (davidm@azstarnet.com). - * - * This file may be redistributed according to the terms of the - * GNU General Public License. - */ -/* - * The original code did not have any comments. I have created many - * comments as I fix the bugs in the code. My comments are based on - * my observation and interpretation of the code. If the original - * author would have spend a few minutes to comment the code, we would - * never had a problem of misinterpretation. -HA - * - * This code could probably be a lot more optimized (especially the - * division routine). However, my foremost concern was to get the - * IEEE behavior right. Performance is less critical as these - * functions are used on exceptional numbers only (well, assuming you - * don't turn on the "trap on inexact"...). - */ -#include -#include "ieee-math.h" - -#define STICKY_S 0x20000000 /* both in longword 0 of fraction */ -#define STICKY_T 1 - -/* - * Careful: order matters here! - */ -enum { - NaN, QNaN, INFTY, ZERO, DENORM, NORMAL -}; - -enum { - SINGLE, DOUBLE -}; - -typedef unsigned long fpclass_t; - -#define IEEE_TMAX 0x7fefffffffffffff -#define IEEE_SMAX 0x47efffffe0000000 -#define IEEE_SNaN 0xfff00000000f0000 -#define IEEE_QNaN 0xfff8000000000000 -#define IEEE_PINF 0x7ff0000000000000 -#define IEEE_NINF 0xfff0000000000000 - - -/* - * The memory format of S floating point numbers differs from the - * register format. In the following, the bitnumbers above the - * diagram below give the memory format while the numbers below give - * the register format. - * - * 31 30 23 22 0 - * +-----------------------------------------------+ - * S | s | exp | fraction | - * +-----------------------------------------------+ - * 63 62 52 51 29 - * - * For T floating point numbers, the register and memory formats - * match: - * - * +-------------------------------------------------------------------+ - * T | s | exp | frac | tion | - * +-------------------------------------------------------------------+ - * 63 62 52 51 32 31 0 - */ -typedef struct { - unsigned long f[2]; /* bit 55 in f[0] is the factor of 2^0*/ - int s; /* 1 bit sign (0 for +, 1 for -) */ - int e; /* 16 bit signed exponent */ -} EXTENDED; - - -/* - * Return the sign of a Q integer, S or T fp number in the register - * format. - */ -static inline int -sign (unsigned long a) -{ - if ((long) a < 0) - return -1; - else - return 1; -} - - -static inline long -cmp128 (const long a[2], const long b[2]) -{ - if (a[1] < b[1]) return -1; - if (a[1] > b[1]) return 1; - return a[0] - b[0]; -} - - -static inline void -sll128 (unsigned long a[2]) -{ - a[1] = (a[1] << 1) | (a[0] >> 63); - a[0] <<= 1; -} - - -static inline void -srl128 (unsigned long a[2]) -{ - a[0] = (a[0] >> 1) | (a[1] << 63); - a[1] >>= 1; -} - - -static inline void -add128 (const unsigned long a[2], const unsigned long b[2], unsigned long c[2]) -{ - unsigned long carry = a[0] > (0xffffffffffffffff - b[0]); - - c[0] = a[0] + b[0]; - c[1] = a[1] + b[1] + carry; -} - - -static inline void -sub128 (const unsigned long a[2], const unsigned long b[2], unsigned long c[2]) -{ - unsigned long borrow = a[0] < b[0]; - - c[0] = a[0] - b[0]; - c[1] = a[1] - b[1] - borrow; -} - - -static inline void -mul64 (const unsigned long a, const unsigned long b, unsigned long c[2]) -{ - c[0] = a * b; - asm ("umulh %1,%2,%0" : "=r"(c[1]) : "r"(a), "r"(b)); -} - - -static void -div128 (unsigned long a[2], unsigned long b[2], unsigned long c[2]) -{ - unsigned long mask[2] = {1, 0}; - - /* - * Shift b until either the sign bit is set or until it is at - * least as big as the dividend: - */ - while (cmp128(b, a) < 0 && sign(b[1]) >= 0) { - sll128(b); - sll128(mask); - } - c[0] = c[1] = 0; - do { - if (cmp128(a, b) >= 0) { - sub128(a, b, a); - add128(mask, c, c); - } - srl128(mask); - srl128(b); - } while (mask[0] || mask[1]); -} - - -static void -normalize (EXTENDED *a) -{ - if (!a->f[0] && !a->f[1]) - return; /* zero fraction, unnormalizable... */ - /* - * In "extended" format, the "1" in "1.f" is explicit; it is - * in bit 55 of f[0], and the decimal point is understood to - * be between bit 55 and bit 54. To normalize, shift the - * fraction until we have a "1" in bit 55. - */ - if ((a->f[0] & 0xff00000000000000) != 0 || a->f[1] != 0) { - /* - * Mantissa is greater than 1.0: - */ - while ((a->f[0] & 0xff80000000000000) != 0x0080000000000000 || - a->f[1] != 0) - { - unsigned long sticky; - - ++a->e; - sticky = a->f[0] & 1; - srl128(a->f); - a->f[0] |= sticky; - } - return; - } - - if (!(a->f[0] & 0x0080000000000000)) { - /* - * Mantissa is less than 1.0: - */ - while (!(a->f[0] & 0x0080000000000000)) { - --a->e; - a->f[0] <<= 1; - } - return; - } -} - - -static inline fpclass_t -ieee_fpclass (unsigned long a) -{ - unsigned long exp, fract; - - exp = (a >> 52) & 0x7ff; /* 11 bits of exponent */ - fract = a & 0x000fffffffffffff; /* 52 bits of fraction */ - if (exp == 0) { - if (fract == 0) - return ZERO; - return DENORM; - } - if (exp == 0x7ff) { - if (fract == 0) - return INFTY; - if (((fract >> 51) & 1) != 0) - return QNaN; - return NaN; - } - return NORMAL; -} - - -/* - * Translate S/T fp number in register format into extended format. - */ -static fpclass_t -extend_ieee (unsigned long a, EXTENDED *b, int prec) -{ - fpclass_t result_kind; - - b->s = a >> 63; - b->e = ((a >> 52) & 0x7ff) - 0x3ff; /* remove bias */ - b->f[1] = 0; - /* - * We shift f[1] left three bits so that the higher order bits - * of the fraction will reside in bits 55 through 0 of f[0]. - */ - b->f[0] = (a & 0x000fffffffffffff) << 3; - result_kind = ieee_fpclass(a); - if (result_kind == NORMAL) { - /* set implied 1. bit: */ - b->f[0] |= 1UL << 55; - } else if (result_kind == DENORM) { - if (prec == SINGLE) - b->e = -126; - else - b->e = -1022; - } - return result_kind; -} - - -/* - * INPUT PARAMETERS: - * a a number in EXTENDED format to be converted to - * s-floating format. - * f rounding mode and exception enable bits. - * OUTPUT PARAMETERS: - * b will contain the s-floating number that "a" was - * converted to (in register format). - */ -static unsigned long -make_s_ieee (long f, EXTENDED *a, unsigned long *b) -{ - unsigned long res, sticky; - - if (!a->e && !a->f[0] && !a->f[1]) { - *b = (unsigned long) a->s << 63; /* return +/-0 */ - return 0; - } - - normalize(a); - res = 0; - - if (a->e < -0x7e) { - res = FPCR_INE; - if (f & IEEE_TRAP_ENABLE_UNF) { - res |= FPCR_UNF; - a->e += 0xc0; /* scale up result by 2^alpha */ - } else { - /* try making denormalized number: */ - while (a->e < -0x7e) { - ++a->e; - sticky = a->f[0] & 1; - srl128(a->f); - if (!a->f[0] && !a->f[0]) { - /* underflow: replace with exact 0 */ - res |= FPCR_UNF; - break; - } - a->f[0] |= sticky; - } - a->e = -0x3ff; - } - } - if (a->e >= 0x80) { - res = FPCR_OVF | FPCR_INE; - if (f & IEEE_TRAP_ENABLE_OVF) { - a->e -= 0xc0; /* scale down result by 2^alpha */ - } else { - /* - * Overflow without trap enabled, substitute - * result according to rounding mode: - */ - switch (RM(f)) { - case ROUND_NEAR: - *b = IEEE_PINF; - break; - - case ROUND_CHOP: - *b = IEEE_SMAX; - break; - - case ROUND_NINF: - if (a->s) { - *b = IEEE_PINF; - } else { - *b = IEEE_SMAX; - } - break; - - case ROUND_PINF: - if (a->s) { - *b = IEEE_SMAX; - } else { - *b = IEEE_PINF; - } - break; - } - *b |= ((unsigned long) a->s << 63); - return res; - } - } - - *b = (((unsigned long) a->s << 63) | - (((unsigned long) a->e + 0x3ff) << 52) | - ((a->f[0] >> 3) & 0x000fffffe0000000)); - return res; -} - - -static unsigned long -make_t_ieee (long f, EXTENDED *a, unsigned long *b) -{ - unsigned long res, sticky; - - if (!a->e && !a->f[0] && !a->f[1]) { - *b = (unsigned long) a->s << 63; /* return +/-0 */ - return 0; - } - - normalize(a); - res = 0; - if (a->e < -0x3fe) { - res = FPCR_INE; - if (f & IEEE_TRAP_ENABLE_UNF) { - res |= FPCR_UNF; - a->e += 0x600; - } else { - /* try making denormalized number: */ - while (a->e < -0x3fe) { - ++a->e; - sticky = a->f[0] & 1; - srl128(a->f); - if (!a->f[0] && !a->f[0]) { - /* underflow: replace with exact 0 */ - res |= FPCR_UNF; - break; - } - a->f[0] |= sticky; - } - a->e = -0x3ff; - } - } - if (a->e >= 0x3ff) { - res = FPCR_OVF | FPCR_INE; - if (f & IEEE_TRAP_ENABLE_OVF) { - a->e -= 0x600; /* scale down result by 2^alpha */ - } else { - /* - * Overflow without trap enabled, substitute - * result according to rounding mode: - */ - switch (RM(f)) { - case ROUND_NEAR: - *b = IEEE_PINF; - break; - - case ROUND_CHOP: - *b = IEEE_TMAX; - break; - - case ROUND_NINF: - if (a->s) { - *b = IEEE_PINF; - } else { - *b = IEEE_TMAX; - } - break; - - case ROUND_PINF: - if (a->s) { - *b = IEEE_TMAX; - } else { - *b = IEEE_PINF; - } - break; - } - *b |= ((unsigned long) a->s << 63); - return res; - } - } - *b = (((unsigned long) a->s << 63) | - (((unsigned long) a->e + 0x3ff) << 52) | - ((a->f[0] >> 3) & 0x000fffffffffffff)); - return res; -} - - -/* - * INPUT PARAMETERS: - * a EXTENDED format number to be rounded. - * rm integer with value ROUND_NEAR, ROUND_CHOP, etc. - * indicates how "a" should be rounded to produce "b". - * OUTPUT PARAMETERS: - * b s-floating number produced by rounding "a". - * RETURN VALUE: - * if no errors occurred, will be zero. Else will contain flags - * like FPCR_INE_OP, etc. - */ -static unsigned long -round_s_ieee (int f, EXTENDED *a, unsigned long *b) -{ - unsigned long diff1, diff2, res = 0; - EXTENDED z1, z2; - - if (!(a->f[0] & 0xffffffff)) { - return make_s_ieee(f, a, b); /* no rounding error */ - } - - /* - * z1 and z2 are the S-floating numbers with the next smaller/greater - * magnitude than a, respectively. - */ - z1.s = z2.s = a->s; - z1.e = z2.e = a->e; - z1.f[0] = z2.f[0] = a->f[0] & 0xffffffff00000000; - z1.f[1] = z2.f[1] = 0; - z2.f[0] += 0x100000000; /* next bigger S float number */ - - switch (RM(f)) { - case ROUND_NEAR: - diff1 = a->f[0] - z1.f[0]; - diff2 = z2.f[0] - a->f[0]; - if (diff1 > diff2) - res = make_s_ieee(f, &z2, b); - else if (diff2 > diff1) - res = make_s_ieee(f, &z1, b); - else - /* equal distance: round towards even */ - if (z1.f[0] & 0x100000000) - res = make_s_ieee(f, &z2, b); - else - res = make_s_ieee(f, &z1, b); - break; - - case ROUND_CHOP: - res = make_s_ieee(f, &z1, b); - break; - - case ROUND_PINF: - if (a->s) { - res = make_s_ieee(f, &z1, b); - } else { - res = make_s_ieee(f, &z2, b); - } - break; - - case ROUND_NINF: - if (a->s) { - res = make_s_ieee(f, &z2, b); - } else { - res = make_s_ieee(f, &z1, b); - } - break; - } - return FPCR_INE | res; -} - - -static unsigned long -round_t_ieee (int f, EXTENDED *a, unsigned long *b) -{ - unsigned long diff1, diff2, res; - EXTENDED z1, z2; - - if (!(a->f[0] & 0x7)) { - /* no rounding error */ - return make_t_ieee(f, a, b); - } - - z1.s = z2.s = a->s; - z1.e = z2.e = a->e; - z1.f[0] = z2.f[0] = a->f[0] & ~0x7; - z1.f[1] = z2.f[1] = 0; - z2.f[0] += (1 << 3); - - res = 0; - switch (RM(f)) { - case ROUND_NEAR: - diff1 = a->f[0] - z1.f[0]; - diff2 = z2.f[0] - a->f[0]; - if (diff1 > diff2) - res = make_t_ieee(f, &z2, b); - else if (diff2 > diff1) - res = make_t_ieee(f, &z1, b); - else - /* equal distance: round towards even */ - if (z1.f[0] & (1 << 3)) - res = make_t_ieee(f, &z2, b); - else - res = make_t_ieee(f, &z1, b); - break; - - case ROUND_CHOP: - res = make_t_ieee(f, &z1, b); - break; - - case ROUND_PINF: - if (a->s) { - res = make_t_ieee(f, &z1, b); - } else { - res = make_t_ieee(f, &z2, b); - } - break; - - case ROUND_NINF: - if (a->s) { - res = make_t_ieee(f, &z2, b); - } else { - res = make_t_ieee(f, &z1, b); - } - break; - } - return FPCR_INE | res; -} - - -static fpclass_t -add_kernel_ieee (EXTENDED *op_a, EXTENDED *op_b, EXTENDED *op_c) -{ - unsigned long mask, fa, fb, fc; - int diff; - - diff = op_a->e - op_b->e; - fa = op_a->f[0]; - fb = op_b->f[0]; - if (diff < 0) { - diff = -diff; - op_c->e = op_b->e; - mask = (1UL << diff) - 1; - fa >>= diff; - if (op_a->f[0] & mask) { - fa |= 1; /* set sticky bit */ - } - } else { - op_c->e = op_a->e; - mask = (1UL << diff) - 1; - fb >>= diff; - if (op_b->f[0] & mask) { - fb |= 1; /* set sticky bit */ - } - } - if (op_a->s) - fa = -fa; - if (op_b->s) - fb = -fb; - fc = fa + fb; - op_c->f[1] = 0; - op_c->s = fc >> 63; - if (op_c->s) { - fc = -fc; - } - op_c->f[0] = fc; - normalize(op_c); - return 0; -} - - -/* - * converts s-floating "a" to t-floating "b". - * - * INPUT PARAMETERS: - * a a s-floating number to be converted - * f the rounding mode (ROUND_NEAR, etc. ) - * OUTPUT PARAMETERS: - * b the t-floating number that "a" is converted to. - * RETURN VALUE: - * error flags - i.e., zero if no errors occurred, - * FPCR_INV if invalid operation occurred, etc. - */ -unsigned long -ieee_CVTST (int f, unsigned long a, unsigned long *b) -{ - EXTENDED temp; - fpclass_t a_type; - - a_type = extend_ieee(a, &temp, SINGLE); - if (a_type >= NaN && a_type <= INFTY) { - *b = a; - if (a_type == NaN) { - *b |= (1UL << 51); /* turn SNaN into QNaN */ - return FPCR_INV; - } - return 0; - } - return round_t_ieee(f, &temp, b); -} - - -/* - * converts t-floating "a" to s-floating "b". - * - * INPUT PARAMETERS: - * a a t-floating number to be converted - * f the rounding mode (ROUND_NEAR, etc. ) - * OUTPUT PARAMETERS: - * b the s-floating number that "a" is converted to. - * RETURN VALUE: - * error flags - i.e., zero if no errors occurred, - * FPCR_INV if invalid operation occurred, etc. - */ -unsigned long -ieee_CVTTS (int f, unsigned long a, unsigned long *b) -{ - EXTENDED temp; - fpclass_t a_type; - - a_type = extend_ieee(a, &temp, DOUBLE); - if (a_type >= NaN && a_type <= INFTY) { - *b = a; - if (a_type == NaN) { - *b |= (1UL << 51); /* turn SNaN into QNaN */ - return FPCR_INV; - } - return 0; - } - return round_s_ieee(f, &temp, b); -} - - -/* - * converts q-format (64-bit integer) "a" to s-floating "b". - * - * INPUT PARAMETERS: - * a an 64-bit integer to be converted. - * f the rounding mode (ROUND_NEAR, etc. ) - * OUTPUT PARAMETERS: - * b the s-floating number "a" is converted to. - * RETURN VALUE: - * error flags - i.e., zero if no errors occurred, - * FPCR_INV if invalid operation occurred, etc. - */ -unsigned long -ieee_CVTQS (int f, unsigned long a, unsigned long *b) -{ - EXTENDED op_b; - - op_b.s = 0; - op_b.f[0] = a; - op_b.f[1] = 0; - if (sign(a) < 0) { - op_b.s = 1; - op_b.f[0] = -a; - } - op_b.e = 55; - normalize(&op_b); - return round_s_ieee(f, &op_b, b); -} - - -/* - * converts 64-bit integer "a" to t-floating "b". - * - * INPUT PARAMETERS: - * a a 64-bit integer to be converted. - * f the rounding mode (ROUND_NEAR, etc.) - * OUTPUT PARAMETERS: - * b the t-floating number "a" is converted to. - * RETURN VALUE: - * error flags - i.e., zero if no errors occurred, - * FPCR_INV if invalid operation occurred, etc. - */ -unsigned long -ieee_CVTQT (int f, long a, unsigned long *b) -{ - EXTENDED op_b; - - if (a != 0) { - op_b.s = (a < 0 ? 1 : 0); - op_b.f[0] = (a < 0 ? -a : a); - op_b.f[1] = 0; - op_b.e = 55; - normalize(&op_b); - return round_t_ieee(f, &op_b, b); - } else { - *b = 0; - return 0; - } -} - - -/* - * converts t-floating "a" to 64-bit integer (q-format) "b". - * - * INPUT PARAMETERS: - * a a t-floating number to be converted. - * f the rounding mode (ROUND_NEAR, etc. ) - * OUTPUT PARAMETERS: - * b the 64-bit integer "a" is converted to. - * RETURN VALUE: - * error flags - i.e., zero if no errors occurred, - * FPCR_INV if invalid operation occurred, etc. - */ -unsigned long -ieee_CVTTQ (int f, unsigned long a, unsigned long *pb) -{ - unsigned int midway; - unsigned long ov, uv, res, b; - fpclass_t a_type; - EXTENDED temp; - - a_type = extend_ieee(a, &temp, DOUBLE); - - b = 0x7fffffffffffffff; - res = FPCR_INV; - if (a_type == NaN || a_type == INFTY) - goto out; - - res = 0; - if (a_type == QNaN) - goto out; - - if (temp.e > 0) { - ov = 0; - while (temp.e > 0) { - --temp.e; - ov |= temp.f[1] >> 63; - sll128(temp.f); - } - if (ov || (temp.f[1] & 0xffc0000000000000)) - res |= FPCR_IOV | FPCR_INE; - } - else if (temp.e < 0) { - while (temp.e < 0) { - ++temp.e; - uv = temp.f[0] & 1; /* save sticky bit */ - srl128(temp.f); - temp.f[0] |= uv; - } - } - b = (temp.f[1] << 9) | (temp.f[0] >> 55); - - /* - * Notice: the fraction is only 52 bits long. Thus, rounding - * cannot possibly result in an integer overflow. - */ - switch (RM(f)) { - case ROUND_NEAR: - if (temp.f[0] & 0x0040000000000000) { - midway = (temp.f[0] & 0x003fffffffffffff) == 0; - if ((midway && (temp.f[0] & 0x0080000000000000)) || - !midway) - ++b; - } - break; - - case ROUND_PINF: - b += ((temp.f[0] & 0x007fffffffffffff) != 0 && !temp.s); - break; - - case ROUND_NINF: - b += ((temp.f[0] & 0x007fffffffffffff) != 0 && temp.s); - break; - - case ROUND_CHOP: - /* no action needed */ - break; - } - if ((temp.f[0] & 0x007fffffffffffff) != 0) - res |= FPCR_INE; - - if (temp.s) { - b = -b; - } - -out: - *pb = b; - return res; -} - - -unsigned long -ieee_CMPTEQ (unsigned long a, unsigned long b, unsigned long *c) -{ - EXTENDED op_a, op_b; - fpclass_t a_type, b_type; - - *c = 0; - a_type = extend_ieee(a, &op_a, DOUBLE); - b_type = extend_ieee(b, &op_b, DOUBLE); - if (a_type == NaN || b_type == NaN) - return FPCR_INV; - if (a_type == QNaN || b_type == QNaN) - return 0; - - if ((op_a.e == op_b.e && op_a.s == op_b.s && - op_a.f[0] == op_b.f[0] && op_a.f[1] == op_b.f[1]) || - (a_type == ZERO && b_type == ZERO)) - *c = 0x4000000000000000; - return 0; -} - - -unsigned long -ieee_CMPTLT (unsigned long a, unsigned long b, unsigned long *c) -{ - fpclass_t a_type, b_type; - EXTENDED op_a, op_b; - - *c = 0; - a_type = extend_ieee(a, &op_a, DOUBLE); - b_type = extend_ieee(b, &op_b, DOUBLE); - if (a_type == NaN || b_type == NaN) - return FPCR_INV; - if (a_type == QNaN || b_type == QNaN) - return 0; - - if ((op_a.s == 1 && op_b.s == 0 && - (a_type != ZERO || b_type != ZERO)) || - (op_a.s == 1 && op_b.s == 1 && - (op_a.e > op_b.e || (op_a.e == op_b.e && - cmp128(op_a.f, op_b.f) > 0))) || - (op_a.s == 0 && op_b.s == 0 && - (op_a.e < op_b.e || (op_a.e == op_b.e && - cmp128(op_a.f,op_b.f) < 0)))) - *c = 0x4000000000000000; - return 0; -} - - -unsigned long -ieee_CMPTLE (unsigned long a, unsigned long b, unsigned long *c) -{ - fpclass_t a_type, b_type; - EXTENDED op_a, op_b; - - *c = 0; - a_type = extend_ieee(a, &op_a, DOUBLE); - b_type = extend_ieee(b, &op_b, DOUBLE); - if (a_type == NaN || b_type == NaN) - return FPCR_INV; - if (a_type == QNaN || b_type == QNaN) - return 0; - - if ((a_type == ZERO && b_type == ZERO) || - (op_a.s == 1 && op_b.s == 0) || - (op_a.s == 1 && op_b.s == 1 && - (op_a.e > op_b.e || (op_a.e == op_b.e && - cmp128(op_a.f,op_b.f) >= 0))) || - (op_a.s == 0 && op_b.s == 0 && - (op_a.e < op_b.e || (op_a.e == op_b.e && - cmp128(op_a.f,op_b.f) <= 0)))) - *c = 0x4000000000000000; - return 0; -} - - -unsigned long -ieee_CMPTUN (unsigned long a, unsigned long b, unsigned long *c) -{ - fpclass_t a_type, b_type; - EXTENDED op_a, op_b; - - *c = 0x4000000000000000; - a_type = extend_ieee(a, &op_a, DOUBLE); - b_type = extend_ieee(b, &op_b, DOUBLE); - if (a_type == NaN || b_type == NaN) - return FPCR_INV; - if (a_type == QNaN || b_type == QNaN) - return 0; - *c = 0; - return 0; -} - - -/* - * Add a + b = c, where a, b, and c are ieee s-floating numbers. "f" - * contains the rounding mode etc. - */ -unsigned long -ieee_ADDS (int f, unsigned long a, unsigned long b, unsigned long *c) -{ - fpclass_t a_type, b_type; - EXTENDED op_a, op_b, op_c; - - a_type = extend_ieee(a, &op_a, SINGLE); - b_type = extend_ieee(b, &op_b, SINGLE); - if ((a_type >= NaN && a_type <= INFTY) || - (b_type >= NaN && b_type <= INFTY)) - { - /* propagate NaNs according to arch. ref. handbook: */ - if (b_type == QNaN) - *c = b; - else if (b_type == NaN) - *c = b | (1UL << 51); - else if (a_type == QNaN) - *c = a; - else if (a_type == NaN) - *c = a | (1UL << 51); - - if (a_type == NaN || b_type == NaN) - return FPCR_INV; - if (a_type == QNaN || b_type == QNaN) - return 0; - - if (a_type == INFTY && b_type == INFTY && sign(a) != sign(b)) { - *c = IEEE_QNaN; - return FPCR_INV; - } - if (a_type == INFTY) - *c = a; - else - *c = b; - return 0; - } - - add_kernel_ieee(&op_a, &op_b, &op_c); - /* special case for -0 + -0 ==> -0 */ - if (a_type == ZERO && b_type == ZERO) - op_c.s = op_a.s && op_b.s; - return round_s_ieee(f, &op_c, c); -} - - -/* - * Add a + b = c, where a, b, and c are ieee t-floating numbers. "f" - * contains the rounding mode etc. - */ -unsigned long -ieee_ADDT (int f, unsigned long a, unsigned long b, unsigned long *c) -{ - fpclass_t a_type, b_type; - EXTENDED op_a, op_b, op_c; - - a_type = extend_ieee(a, &op_a, DOUBLE); - b_type = extend_ieee(b, &op_b, DOUBLE); - if ((a_type >= NaN && a_type <= INFTY) || - (b_type >= NaN && b_type <= INFTY)) - { - /* propagate NaNs according to arch. ref. handbook: */ - if (b_type == QNaN) - *c = b; - else if (b_type == NaN) - *c = b | (1UL << 51); - else if (a_type == QNaN) - *c = a; - else if (a_type == NaN) - *c = a | (1UL << 51); - - if (a_type == NaN || b_type == NaN) - return FPCR_INV; - if (a_type == QNaN || b_type == QNaN) - return 0; - - if (a_type == INFTY && b_type == INFTY && sign(a) != sign(b)) { - *c = IEEE_QNaN; - return FPCR_INV; - } - if (a_type == INFTY) - *c = a; - else - *c = b; - return 0; - } - add_kernel_ieee(&op_a, &op_b, &op_c); - /* special case for -0 + -0 ==> -0 */ - if (a_type == ZERO && b_type == ZERO) - op_c.s = op_a.s && op_b.s; - - return round_t_ieee(f, &op_c, c); -} - - -/* - * Subtract a - b = c, where a, b, and c are ieee s-floating numbers. - * "f" contains the rounding mode etc. - */ -unsigned long -ieee_SUBS (int f, unsigned long a, unsigned long b, unsigned long *c) -{ - fpclass_t a_type, b_type; - EXTENDED op_a, op_b, op_c; - - a_type = extend_ieee(a, &op_a, SINGLE); - b_type = extend_ieee(b, &op_b, SINGLE); - if ((a_type >= NaN && a_type <= INFTY) || - (b_type >= NaN && b_type <= INFTY)) - { - /* propagate NaNs according to arch. ref. handbook: */ - if (b_type == QNaN) - *c = b; - else if (b_type == NaN) - *c = b | (1UL << 51); - else if (a_type == QNaN) - *c = a; - else if (a_type == NaN) - *c = a | (1UL << 51); - - if (a_type == NaN || b_type == NaN) - return FPCR_INV; - if (a_type == QNaN || b_type == QNaN) - return 0; - - if (a_type == INFTY && b_type == INFTY && sign(a) == sign(b)) { - *c = IEEE_QNaN; - return FPCR_INV; - } - if (a_type == INFTY) - *c = a; - else - *c = b ^ (1UL << 63); - return 0; - } - op_b.s = !op_b.s; - add_kernel_ieee(&op_a, &op_b, &op_c); - /* special case for -0 - +0 ==> -0 */ - if (a_type == ZERO && b_type == ZERO) - op_c.s = op_a.s && op_b.s; - - return round_s_ieee(f, &op_c, c); -} - - -/* - * Subtract a - b = c, where a, b, and c are ieee t-floating numbers. - * "f" contains the rounding mode etc. - */ -unsigned long -ieee_SUBT (int f, unsigned long a, unsigned long b, unsigned long *c) -{ - fpclass_t a_type, b_type; - EXTENDED op_a, op_b, op_c; - - a_type = extend_ieee(a, &op_a, DOUBLE); - b_type = extend_ieee(b, &op_b, DOUBLE); - if ((a_type >= NaN && a_type <= INFTY) || - (b_type >= NaN && b_type <= INFTY)) - { - /* propagate NaNs according to arch. ref. handbook: */ - if (b_type == QNaN) - *c = b; - else if (b_type == NaN) - *c = b | (1UL << 51); - else if (a_type == QNaN) - *c = a; - else if (a_type == NaN) - *c = a | (1UL << 51); - - if (a_type == NaN || b_type == NaN) - return FPCR_INV; - if (a_type == QNaN || b_type == QNaN) - return 0; - - if (a_type == INFTY && b_type == INFTY && sign(a) == sign(b)) { - *c = IEEE_QNaN; - return FPCR_INV; - } - if (a_type == INFTY) - *c = a; - else - *c = b ^ (1UL << 63); - return 0; - } - op_b.s = !op_b.s; - add_kernel_ieee(&op_a, &op_b, &op_c); - /* special case for -0 - +0 ==> -0 */ - if (a_type == ZERO && b_type == ZERO) - op_c.s = op_a.s && op_b.s; - - return round_t_ieee(f, &op_c, c); -} - - -/* - * Multiply a x b = c, where a, b, and c are ieee s-floating numbers. - * "f" contains the rounding mode. - */ -unsigned long -ieee_MULS (int f, unsigned long a, unsigned long b, unsigned long *c) -{ - fpclass_t a_type, b_type; - EXTENDED op_a, op_b, op_c; - - a_type = extend_ieee(a, &op_a, SINGLE); - b_type = extend_ieee(b, &op_b, SINGLE); - if ((a_type >= NaN && a_type <= INFTY) || - (b_type >= NaN && b_type <= INFTY)) - { - /* propagate NaNs according to arch. ref. handbook: */ - if (b_type == QNaN) - *c = b; - else if (b_type == NaN) - *c = b | (1UL << 51); - else if (a_type == QNaN) - *c = a; - else if (a_type == NaN) - *c = a | (1UL << 51); - - if (a_type == NaN || b_type == NaN) - return FPCR_INV; - if (a_type == QNaN || b_type == QNaN) - return 0; - - if ((a_type == INFTY && b_type == ZERO) || - (b_type == INFTY && a_type == ZERO)) - { - *c = IEEE_QNaN; /* return canonical QNaN */ - return FPCR_INV; - } - if (a_type == INFTY) - *c = a ^ ((b >> 63) << 63); - else if (b_type == INFTY) - *c = b ^ ((a >> 63) << 63); - else - /* either of a and b are +/-0 */ - *c = ((unsigned long) op_a.s ^ op_b.s) << 63; - return 0; - } - op_c.s = op_a.s ^ op_b.s; - op_c.e = op_a.e + op_b.e - 55; - mul64(op_a.f[0], op_b.f[0], op_c.f); - - return round_s_ieee(f, &op_c, c); -} - - -/* - * Multiply a x b = c, where a, b, and c are ieee t-floating numbers. - * "f" contains the rounding mode. - */ -unsigned long -ieee_MULT (int f, unsigned long a, unsigned long b, unsigned long *c) -{ - fpclass_t a_type, b_type; - EXTENDED op_a, op_b, op_c; - - *c = IEEE_QNaN; - a_type = extend_ieee(a, &op_a, DOUBLE); - b_type = extend_ieee(b, &op_b, DOUBLE); - if ((a_type >= NaN && a_type <= ZERO) || - (b_type >= NaN && b_type <= ZERO)) - { - /* propagate NaNs according to arch. ref. handbook: */ - if (b_type == QNaN) - *c = b; - else if (b_type == NaN) - *c = b | (1UL << 51); - else if (a_type == QNaN) - *c = a; - else if (a_type == NaN) - *c = a | (1UL << 51); - - if (a_type == NaN || b_type == NaN) - return FPCR_INV; - if (a_type == QNaN || b_type == QNaN) - return 0; - - if ((a_type == INFTY && b_type == ZERO) || - (b_type == INFTY && a_type == ZERO)) - { - *c = IEEE_QNaN; /* return canonical QNaN */ - return FPCR_INV; - } - if (a_type == INFTY) - *c = a ^ ((b >> 63) << 63); - else if (b_type == INFTY) - *c = b ^ ((a >> 63) << 63); - else - /* either of a and b are +/-0 */ - *c = ((unsigned long) op_a.s ^ op_b.s) << 63; - return 0; - } - op_c.s = op_a.s ^ op_b.s; - op_c.e = op_a.e + op_b.e - 55; - mul64(op_a.f[0], op_b.f[0], op_c.f); - - return round_t_ieee(f, &op_c, c); -} - - -/* - * Divide a / b = c, where a, b, and c are ieee s-floating numbers. - * "f" contains the rounding mode etc. - */ -unsigned long -ieee_DIVS (int f, unsigned long a, unsigned long b, unsigned long *c) -{ - fpclass_t a_type, b_type; - EXTENDED op_a, op_b, op_c; - - a_type = extend_ieee(a, &op_a, SINGLE); - b_type = extend_ieee(b, &op_b, SINGLE); - if ((a_type >= NaN && a_type <= ZERO) || - (b_type >= NaN && b_type <= ZERO)) - { - unsigned long res; - - /* propagate NaNs according to arch. ref. handbook: */ - if (b_type == QNaN) - *c = b; - else if (b_type == NaN) - *c = b | (1UL << 51); - else if (a_type == QNaN) - *c = a; - else if (a_type == NaN) - *c = a | (1UL << 51); - - if (a_type == NaN || b_type == NaN) - return FPCR_INV; - if (a_type == QNaN || b_type == QNaN) - return 0; - - res = 0; - *c = IEEE_PINF; - if (a_type == INFTY) { - if (b_type == INFTY) { - *c = IEEE_QNaN; - return FPCR_INV; - } - } else if (b_type == ZERO) { - if (a_type == ZERO) { - *c = IEEE_QNaN; - return FPCR_INV; - } - res = FPCR_DZE; - } else - /* a_type == ZERO || b_type == INFTY */ - *c = 0; - *c |= (unsigned long) (op_a.s ^ op_b.s) << 63; - return res; - } - op_c.s = op_a.s ^ op_b.s; - op_c.e = op_a.e - op_b.e; - - op_a.f[1] = op_a.f[0]; - op_a.f[0] = 0; - div128(op_a.f, op_b.f, op_c.f); - if (a_type != ZERO) - /* force a sticky bit because DIVs never hit exact .5: */ - op_c.f[0] |= STICKY_S; - normalize(&op_c); - op_c.e -= 9; /* remove excess exp from original shift */ - return round_s_ieee(f, &op_c, c); -} - - -/* - * Divide a/b = c, where a, b, and c are ieee t-floating numbers. "f" - * contains the rounding mode etc. - */ -unsigned long -ieee_DIVT (int f, unsigned long a, unsigned long b, unsigned long *c) -{ - fpclass_t a_type, b_type; - EXTENDED op_a, op_b, op_c; - - *c = IEEE_QNaN; - a_type = extend_ieee(a, &op_a, DOUBLE); - b_type = extend_ieee(b, &op_b, DOUBLE); - if ((a_type >= NaN && a_type <= ZERO) || - (b_type >= NaN && b_type <= ZERO)) - { - unsigned long res; - - /* propagate NaNs according to arch. ref. handbook: */ - if (b_type == QNaN) - *c = b; - else if (b_type == NaN) - *c = b | (1UL << 51); - else if (a_type == QNaN) - *c = a; - else if (a_type == NaN) - *c = a | (1UL << 51); - - if (a_type == NaN || b_type == NaN) - return FPCR_INV; - if (a_type == QNaN || b_type == QNaN) - return 0; - - res = 0; - *c = IEEE_PINF; - if (a_type == INFTY) { - if (b_type == INFTY) { - *c = IEEE_QNaN; - return FPCR_INV; - } - } else if (b_type == ZERO) { - if (a_type == ZERO) { - *c = IEEE_QNaN; - return FPCR_INV; - } - res = FPCR_DZE; - } else - /* a_type == ZERO || b_type == INFTY */ - *c = 0; - *c |= (unsigned long) (op_a.s ^ op_b.s) << 63; - return res; - } - op_c.s = op_a.s ^ op_b.s; - op_c.e = op_a.e - op_b.e; - - op_a.f[1] = op_a.f[0]; - op_a.f[0] = 0; - div128(op_a.f, op_b.f, op_c.f); - if (a_type != ZERO) - /* force a sticky bit because DIVs never hit exact .5 */ - op_c.f[0] |= STICKY_T; - normalize(&op_c); - op_c.e -= 9; /* remove excess exp from original shift */ - return round_t_ieee(f, &op_c, c); -} - -/* - * Sqrt a = b, where a and b are ieee s-floating numbers. "f" - * contains the rounding mode etc. - */ -unsigned long -ieee_SQRTS (int f, unsigned long a, unsigned long *b) -{ - fpclass_t a_type; - EXTENDED op_a, op_b; - - *b = IEEE_QNaN; - a_type = extend_ieee(a, &op_a, SINGLE); - if (op_a.s == 0) { - /* FIXME -- handle positive denormals. */ - send_sig(SIGFPE, current, 1); - } - return FPCR_INV; -} - -/* - * Sqrt a = b, where a and b are ieee t-floating numbers. "f" - * contains the rounding mode etc. - */ -unsigned long -ieee_SQRTT (int f, unsigned long a, unsigned long *b) -{ - fpclass_t a_type; - EXTENDED op_a, op_b; - - *b = IEEE_QNaN; - a_type = extend_ieee(a, &op_a, DOUBLE); - if (op_a.s == 0) { - /* FIXME -- handle positive denormals. */ - send_sig(SIGFPE, current, 1); - } - return FPCR_INV; -} diff --git a/arch/alpha/math-emu/ieee-math.h b/arch/alpha/math-emu/ieee-math.h deleted file mode 100644 index 076a6d1c8..000000000 --- a/arch/alpha/math-emu/ieee-math.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 1992,1995 by - * Digital Equipment Corporation, Maynard, Massachusetts. - * This file may be redistributed according to the terms of the - * GNU General Public License. - */ -#ifndef __ieee_math_h__ -#define __ieee_math_h__ - -#include - -#define ROUND_SHIFT 6 /* make space for trap-enable bits */ -#define RM(f) (((f) >> ROUND_SHIFT) & 0x3) - -#define ROUND_CHOP (FPCR_DYN_CHOPPED >> FPCR_DYN_SHIFT) -#define ROUND_NINF (FPCR_DYN_MINUS >> FPCR_DYN_SHIFT) -#define ROUND_NEAR (FPCR_DYN_NORMAL >> FPCR_DYN_SHIFT) -#define ROUND_PINF (FPCR_DYN_PLUS >> FPCR_DYN_SHIFT) - -extern unsigned long ieee_CVTST (int rm, unsigned long a, unsigned long *b); -extern unsigned long ieee_CVTTS (int rm, unsigned long a, unsigned long *b); -extern unsigned long ieee_CVTQS (int rm, unsigned long a, unsigned long *b); -extern unsigned long ieee_CVTQT (int rm, long a, unsigned long *b); -extern unsigned long ieee_CVTTQ (int rm, unsigned long a, unsigned long *b); - -extern unsigned long ieee_CMPTEQ (unsigned long a, unsigned long b, - unsigned long *c); -extern unsigned long ieee_CMPTLT (unsigned long a, unsigned long b, - unsigned long *c); -extern unsigned long ieee_CMPTLE (unsigned long a, unsigned long b, - unsigned long *c); -extern unsigned long ieee_CMPTUN (unsigned long a, unsigned long b, - unsigned long *c); - -extern unsigned long ieee_ADDS (int rm, unsigned long a, unsigned long b, - unsigned long *c); -extern unsigned long ieee_ADDT (int rm, unsigned long a, unsigned long b, - unsigned long *c); -extern unsigned long ieee_SUBS (int rm, unsigned long a, unsigned long b, - unsigned long *c); -extern unsigned long ieee_SUBT (int rm, unsigned long a, unsigned long b, - unsigned long *c); -extern unsigned long ieee_MULS (int rm, unsigned long a, unsigned long b, - unsigned long *c); -extern unsigned long ieee_MULT (int rm, unsigned long a, unsigned long b, - unsigned long *c); -extern unsigned long ieee_DIVS (int rm, unsigned long a, unsigned long b, - unsigned long *c); -extern unsigned long ieee_DIVT (int rm, unsigned long a, unsigned long b, - unsigned long *c); -extern unsigned long ieee_SQRTS (int rm, unsigned long a, unsigned long *b); -extern unsigned long ieee_SQRTT (int rm, unsigned long a, unsigned long *b); - -#endif /* __ieee_math_h__ */ diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c new file mode 100644 index 000000000..c0ece7bcb --- /dev/null +++ b/arch/alpha/math-emu/math.c @@ -0,0 +1,447 @@ +#include +#include +#include +#include + +#include + +#include "sfp-util.h" +#include +#include +#include + +#define OPC_PAL 0x00 +#define OPC_INTA 0x10 +#define OPC_INTL 0x11 +#define OPC_INTS 0x12 +#define OPC_INTM 0x13 +#define OPC_FLTC 0x14 +#define OPC_FLTV 0x15 +#define OPC_FLTI 0x16 +#define OPC_FLTL 0x17 +#define OPC_MISC 0x18 +#define OPC_JSR 0x1a + +#define FOP_SRC_S 0 +#define FOP_SRC_T 2 +#define FOP_SRC_Q 3 + +#define FOP_FNC_ADDx 0 +#define FOP_FNC_CVTQL 0 +#define FOP_FNC_SUBx 1 +#define FOP_FNC_MULx 2 +#define FOP_FNC_DIVx 3 +#define FOP_FNC_CMPxUN 4 +#define FOP_FNC_CMPxEQ 5 +#define FOP_FNC_CMPxLT 6 +#define FOP_FNC_CMPxLE 7 +#define FOP_FNC_SQRTx 11 +#define FOP_FNC_CVTxS 12 +#define FOP_FNC_CVTxT 14 +#define FOP_FNC_CVTxQ 15 + +#define MISC_TRAPB 0x0000 +#define MISC_EXCB 0x0400 + +extern unsigned long alpha_read_fp_reg (unsigned long reg); +extern void alpha_write_fp_reg (unsigned long reg, unsigned long val); +extern unsigned long alpha_read_fp_reg_s (unsigned long reg); +extern void alpha_write_fp_reg_s (unsigned long reg, unsigned long val); + + +#ifdef MODULE + +MODULE_DESCRIPTION("FP Software completion module"); + +extern long (*alpha_fp_emul_imprecise)(struct pt_regs *, unsigned long); +extern long (*alpha_fp_emul) (unsigned long pc); + +static long (*save_emul_imprecise)(struct pt_regs *, unsigned long); +static long (*save_emul) (unsigned long pc); + +long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long); +long do_alpha_fp_emul(unsigned long); + +int init_module(void) +{ + save_emul_imprecise = alpha_fp_emul_imprecise; + save_emul = alpha_fp_emul; + alpha_fp_emul_imprecise = do_alpha_fp_emul_imprecise; + alpha_fp_emul = do_alpha_fp_emul; + return 0; +} + +void cleanup_module(void) +{ + alpha_fp_emul_imprecise = save_emul_imprecise; + alpha_fp_emul = save_emul; +} + +#undef alpha_fp_emul_imprecise +#define alpha_fp_emul_imprecise do_alpha_fp_emul_imprecise +#undef alpha_fp_emul +#define alpha_fp_emul do_alpha_fp_emul + +#endif /* MODULE */ + +/* For 128-bit division. */ + +void +udiv128(unsigned long divisor_f0, unsigned long divisor_f1, + unsigned long dividend_f0, unsigned long dividend_f1, + unsigned long *quot, unsigned long *remd) +{ + _FP_FRAC_DECL_2(quo); + _FP_FRAC_DECL_2(rem); + _FP_FRAC_DECL_2(tmp); + unsigned long i, num_bits, bit; + + _FP_FRAC_SET_2(rem, _FP_ZEROFRAC_2); + _FP_FRAC_SET_2(quo, _FP_ZEROFRAC_2); + + if (_FP_FRAC_ZEROP_2(divisor)) + goto out; + + if (_FP_FRAC_GT_2(divisor, dividend)) { + _FP_FRAC_COPY_2(rem, dividend); + goto out; + } + + if (_FP_FRAC_EQ_2(divisor, dividend)) { + __FP_FRAC_SET_2(quo, 0, 1); + goto out; + } + + num_bits = 128; + while (1) { + bit = _FP_FRAC_NEGP_2(dividend); + _FP_FRAC_COPY_2(tmp, rem); + _FP_FRAC_SLL_2(tmp, 1); + _FP_FRAC_LOW_2(tmp) |= bit; + if (! _FP_FRAC_GE_2(tmp, divisor)) + break; + _FP_FRAC_COPY_2(rem, tmp); + _FP_FRAC_SLL_2(dividend, 1); + num_bits--; + } + + for (i = 0; i < num_bits; i++) { + bit = _FP_FRAC_NEGP_2(dividend); + _FP_FRAC_SLL_2(rem, 1); + _FP_FRAC_LOW_2(rem) |= bit; + _FP_FRAC_SUB_2(tmp, rem, divisor); + bit = _FP_FRAC_NEGP_2(tmp); + _FP_FRAC_SLL_2(dividend, 1); + _FP_FRAC_SLL_2(quo, 1); + if (!bit) { + _FP_FRAC_LOW_2(quo) |= 1; + _FP_FRAC_COPY_2(rem, tmp); + } + } + +out: + *quot = quo_f1; + *remd = rem_f1; + return; +} + +/* + * Emulate the floating point instruction at address PC. Returns 0 if + * emulation fails. Notice that the kernel does not and cannot use FP + * regs. This is good because it means that instead of + * saving/restoring all fp regs, we simply stick the result of the + * operation into the appropriate register. + */ +long +alpha_fp_emul (unsigned long pc) +{ + FP_DECL_EX; + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); + FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); + + unsigned long fa, fb, fc, func, mode, src; + unsigned long fpcw = current->thread.flags; + unsigned long res, va, vb, vc, fpcr; + __u32 insn; + + MOD_INC_USE_COUNT; + + get_user(insn, (__u32*)pc); + fc = (insn >> 0) & 0x1f; /* destination register */ + fb = (insn >> 16) & 0x1f; + fa = (insn >> 21) & 0x1f; + func = (insn >> 5) & 0xf; + src = (insn >> 9) & 0x3; + mode = (insn >> 11) & 0x3; + + fpcr = rdfpcr(); + + if (mode == 3) { + /* Dynamic -- get rounding mode from fpcr. */ + mode = (fpcr >> FPCR_DYN_SHIFT) & 3; + } + + switch (src) { + case FOP_SRC_S: + va = alpha_read_fp_reg_s(fa); + vb = alpha_read_fp_reg_s(fb); + + FP_UNPACK_SP(SA, &va); + FP_UNPACK_SP(SB, &vb); + + switch (func) { + case FOP_FNC_SUBx: + FP_SUB_S(SR, SA, SB); + goto pack_s; + + case FOP_FNC_ADDx: + FP_ADD_S(SR, SA, SB); + goto pack_s; + + case FOP_FNC_MULx: + FP_MUL_S(SR, SA, SB); + goto pack_s; + + case FOP_FNC_DIVx: + FP_DIV_S(SR, SA, SB); + goto pack_s; + + case FOP_FNC_SQRTx: + FP_SQRT_S(SR, SB); + goto pack_s; + } + goto bad_insn; + + case FOP_SRC_T: + va = alpha_read_fp_reg(fa); + vb = alpha_read_fp_reg(fb); + + if ((func & ~3) == FOP_FNC_CMPxUN) { + FP_UNPACK_RAW_DP(DA, &va); + FP_UNPACK_RAW_DP(DB, &vb); + if (!DA_e && !_FP_FRAC_ZEROP_1(DA)) { + FP_SET_EXCEPTION(FP_EX_DENORM); + if (FP_DENORM_ZERO) + _FP_FRAC_SET_1(DA, _FP_ZEROFRAC_1); + } + if (!DB_e && !_FP_FRAC_ZEROP_1(DB)) { + FP_SET_EXCEPTION(FP_EX_DENORM); + if (FP_DENORM_ZERO) + _FP_FRAC_SET_1(DB, _FP_ZEROFRAC_1); + } + FP_CMP_D(res, DA, DB, 3); + vc = 0x4000000000000000; + /* CMPTEQ, CMPTUN don't trap on QNaN, while CMPTLT and CMPTLE do */ + if (res == 3 && ((func & 3) >= 2 || FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB))) + FP_SET_EXCEPTION(FP_EX_INVALID); + switch (func) { + case FOP_FNC_CMPxUN: if (res != 3) vc = 0; break; + case FOP_FNC_CMPxEQ: if (res) vc = 0; break; + case FOP_FNC_CMPxLT: if (res != -1) vc = 0; break; + case FOP_FNC_CMPxLE: if ((long)res > 0) vc = 0; break; + } + goto done_d; + } + + FP_UNPACK_DP(DA, &va); + FP_UNPACK_DP(DB, &vb); + + switch (func) { + case FOP_FNC_SUBx: + FP_SUB_D(DR, DA, DB); + goto pack_d; + + case FOP_FNC_ADDx: + FP_ADD_D(DR, DA, DB); + goto pack_d; + + case FOP_FNC_MULx: + FP_MUL_D(DR, DA, DB); + goto pack_d; + + case FOP_FNC_DIVx: + FP_DIV_D(DR, DA, DB); + goto pack_d; + + case FOP_FNC_SQRTx: + FP_SQRT_D(DR, DB); + goto pack_d; + + case FOP_FNC_CVTxS: + /* It is irritating that DEC encoded CVTST with + SRC == T_floating. It is also interesting that + the bit used to tell the two apart is /U... */ + if (insn & 0x2000) { + FP_CONV(S,D,1,1,SR,DB); + goto pack_s; + } else { + /* CVTST need do nothing else but copy the + bits and repack. */ + DR_c = DB_c; + DR_s = DB_s; + DR_e = DB_e; + DR_f = DB_f; + goto pack_d; + } + + case FOP_FNC_CVTxQ: + if (DB_c == FP_CLS_NAN && (_FP_FRAC_HIGH_RAW_D(DB) & _FP_QNANBIT_D)) + vc = 0; /* AAHB Table B-2 sais QNaN should not trigger INV */ + else + FP_TO_INT_ROUND_D(vc, DB, 64, 2); + goto done_d; + } + goto bad_insn; + + case FOP_SRC_Q: + vb = alpha_read_fp_reg(fb); + + switch (func) { + case FOP_FNC_CVTQL: + /* Notice: We can get here only due to an integer + overflow. Such overflows are reported as invalid + ops. We return the result the hw would have + computed. */ + vc = ((vb & 0xc0000000) << 32 | /* sign and msb */ + (vb & 0x3fffffff) << 29); /* rest of the int */ + FP_SET_EXCEPTION (FP_EX_INVALID); + goto done_d; + + case FOP_FNC_CVTxS: + FP_FROM_INT_S(SR, ((long)vb), 64, long); + goto pack_s; + + case FOP_FNC_CVTxT: + FP_FROM_INT_D(DR, ((long)vb), 64, long); + goto pack_d; + } + goto bad_insn; + } + goto bad_insn; + +pack_s: + FP_PACK_SP(&vc, SR); + alpha_write_fp_reg_s(fc, vc); + goto done; + +pack_d: + FP_PACK_DP(&vc, DR); +done_d: + alpha_write_fp_reg(fc, vc); + goto done; + + /* + * Take the appropriate action for each possible + * floating-point result: + * + * - Set the appropriate bits in the FPCR + * - If the specified exception is enabled in the FPCR, + * return. The caller (entArith) will dispatch + * the appropriate signal to the translated program. + * + * In addition, properly track the exception state in software + * as described in the Alpha Architectre Handbook section 4.7.7.3. + */ +done: + if (_fex) { + /* Record exceptions in software control word. */ + current->thread.flags + = fpcw |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT); + + /* Update hardware control register */ + fpcr &= (~FPCR_MASK | FPCR_DYN_MASK); + fpcr |= ieee_swcr_to_fpcr(fpcw); + wrfpcr(fpcr); + + /* Do we generate a signal? */ + if (_fex & fpcw & IEEE_TRAP_ENABLE_MASK) { + MOD_DEC_USE_COUNT; + return 0; + } + } + + /* We used to write the destination register here, but DEC FORTRAN + requires that the result *always* be written... so we do the write + immediately after the operations above. */ + + MOD_DEC_USE_COUNT; + return 1; + +bad_insn: + printk(KERN_ERR "alpha_fp_emul: Invalid FP insn %#x at %#lx\n", + insn, pc); + MOD_DEC_USE_COUNT; + return 0; +} + +long +alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask) +{ + unsigned long trigger_pc = regs->pc - 4; + unsigned long insn, opcode, rc; + + MOD_INC_USE_COUNT; + + /* + * Turn off the bits corresponding to registers that are the + * target of instructions that set bits in the exception + * summary register. We have some slack doing this because a + * register that is the target of a trapping instruction can + * be written at most once in the trap shadow. + * + * Branches, jumps, TRAPBs, EXCBs and calls to PALcode all + * bound the trap shadow, so we need not look any further than + * up to the first occurrence of such an instruction. + */ + while (write_mask) { + get_user(insn, (__u32*)(trigger_pc)); + opcode = insn >> 26; + rc = insn & 0x1f; + + switch (opcode) { + case OPC_PAL: + case OPC_JSR: + case 0x30 ... 0x3f: /* branches */ + MOD_DEC_USE_COUNT; + return 0; + + case OPC_MISC: + switch (insn & 0xffff) { + case MISC_TRAPB: + case MISC_EXCB: + MOD_DEC_USE_COUNT; + return 0; + + default: + break; + } + break; + + case OPC_INTA: + case OPC_INTL: + case OPC_INTS: + case OPC_INTM: + write_mask &= ~(1UL << rc); + break; + + case OPC_FLTC: + case OPC_FLTV: + case OPC_FLTI: + case OPC_FLTL: + write_mask &= ~(1UL << (rc + 32)); + break; + } + if (!write_mask) { + if (alpha_fp_emul(trigger_pc)) { + /* re-execute insns in trap-shadow: */ + regs->pc = trigger_pc + 4; + MOD_DEC_USE_COUNT; + return 1; + } + break; + } + trigger_pc -= 4; + } + MOD_DEC_USE_COUNT; + return 0; +} diff --git a/arch/alpha/math-emu/sfp-util.h b/arch/alpha/math-emu/sfp-util.h new file mode 100644 index 000000000..7a6a8cf45 --- /dev/null +++ b/arch/alpha/math-emu/sfp-util.h @@ -0,0 +1,40 @@ +#include +#include +#include +#include +#include + +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + ((sl) = (al) + (bl), (sh) = (ah) + (bh) + ((sl) < (al))) + +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + ((sl) = (al) - (bl), (sh) = (ah) - (bh) - ((al) < (bl))) + +#define umul_ppmm(wh, wl, u, v) \ + __asm__ ("mulq %2,%3,%1; umulh %2,%3,%0" \ + : "=r" ((UDItype)(wh)), \ + "=&r" ((UDItype)(wl)) \ + : "r" ((UDItype)(u)), \ + "r" ((UDItype)(v))) + +extern void udiv128(unsigned long, unsigned long, + unsigned long, unsigned long, + unsigned long *, + unsigned long *); + +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + unsigned long xr, xi; \ + udiv128((n0), (n1), 0, (d), &xr, &xi); \ + (q) = xr; \ + (r) = xi; \ + } while (0) + +#define UDIV_NEEDS_NORMALIZATION 1 + +#define abort() goto bad_insn + +#ifndef __LITTLE_ENDIAN +#define __LITTLE_ENDIAN -1 +#endif +#define __BYTE_ORDER __LITTLE_ENDIAN -- cgit v1.2.3