.file "reg_round.S" /*---------------------------------------------------------------------------+ | reg_round.S | | | | Rounding/truncation/etc for FPU basic arithmetic functions. | | | | Copyright (C) 1993,1995 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | | | This code has four possible entry points. | | The following must be entered by a jmp instruction: | | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. | | | | The _round_reg entry point is intended to be used by C code. | | From C, call as: | | void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) | | | | For correct "up" and "down" rounding, the argument must have the correct | | sign. | | | +---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------+ | Four entry points. | | | | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: | | %eax:%ebx 64 bit significand | | %edx 32 bit extension of the significand | | %edi pointer to an FPU_REG for the result to be stored | | stack calling function must have set up a C stack frame and | | pushed %esi, %edi, and %ebx | | | | Needed just for the fpu_reg_round_sqrt entry point: | | %cx A control word in the same format as the FPU control word. | | Otherwise, PARAM4 must give such a value. | | | | | | The significand and its extension are assumed to be exact in the | | following sense: | | If the significand by itself is the exact result then the significand | | extension (%edx) must contain 0, otherwise the significand extension | | must be non-zero. | | If the significand extension is non-zero then the significand is | | smaller than the magnitude of the correct exact result by an amount | | greater than zero and less than one ls bit of the significand. | | The significand extension is only required to have three possible | | non-zero values: | | less than 0x80000000 <=> the significand is less than 1/2 an ls | | bit smaller than the magnitude of the | | true exact result. | | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | | smaller than the magnitude of the true | | exact result. | | greater than 0x80000000 <=> the significand is more than 1/2 an ls | | bit smaller than the magnitude of the | | true exact result. | | | +---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------+ | The code in this module has become quite complex, but it should handle | | all of the FPU flags which are set at this stage of the basic arithmetic | | computations. | | There are a few rare cases where the results are not set identically to | | a real FPU. These require a bit more thought because at this stage the | | results of the code here appear to be more consistent... | | This may be changed in a future version. | +---------------------------------------------------------------------------*/ #include "fpu_emu.h" #include "exception.h" #include "control_w.h" /* Flags for FPU_bits_lost */ #define LOST_DOWN $1 #define LOST_UP $2 /* Flags for FPU_denormal */ #define DENORMAL $1 #define UNMASKED_UNDERFLOW $2 #ifndef NON_REENTRANT_FPU /* Make the code re-entrant by putting local storage on the stack: */ #define FPU_bits_lost (%esp) #define FPU_denormal 1(%esp) #else /* Not re-entrant, so we can gain speed by putting local storage in a static area: */ .data .align 2,0 FPU_bits_lost: .byte 0 FPU_denormal: .byte 0 #endif NON_REENTRANT_FPU .text .globl fpu_reg_round .globl fpu_reg_round_sqrt .globl fpu_Arith_exit /* Entry point when called from C */ ENTRY(round_reg) pushl %ebp movl %esp,%ebp pushl %esi pushl %edi pushl %ebx movl PARAM1,%edi movl SIGH(%edi),%eax movl SIGL(%edi),%ebx movl PARAM2,%edx movl PARAM3,%ecx jmp fpu_reg_round_sqrt fpu_reg_round: /* Normal entry point */ movl PARAM4,%ecx fpu_reg_round_sqrt: /* Entry point from wm_sqrt.S */ #ifndef NON_REENTRANT_FPU pushl %ebx /* adjust the stack pointer */ #endif NON_REENTRANT_FPU #ifdef PARANOID /* Cannot use this here yet */ /* orl %eax,%eax */ /* jns L_entry_bugged */ #endif PARANOID cmpl EXP_UNDER,EXP(%edi) jle xMake_denorm /* The number is a de-normal */ movb $0,FPU_denormal /* 0 -> not a de-normal */ xDenorm_done: movb $0,FPU_bits_lost /* No bits yet lost in rounding */ movl %ecx,%esi andl CW_PC,%ecx cmpl PR_64_BITS,%ecx je LRound_To_64 cmpl PR_53_BITS,%ecx je LRound_To_53 cmpl PR_24_BITS,%ecx je LRound_To_24 #ifdef PECULIAR_486 /* With the precision control bits set to 01 "(reserved)", a real 80486 behaves as if the precision control bits were set to 11 "64 bits" */ cmpl PR_RESERVED_BITS,%ecx je LRound_To_64 #ifdef PARANOID jmp L_bugged_denorm_486 #endif PARANOID #else #ifdef PARANOID jmp L_bugged_denorm /* There is no bug, just a bad control word */ #endif PARANOID #endif PECULIAR_486 /* Round etc to 24 bit precision */ LRound_To_24: movl %esi,%ecx andl CW_RC,%ecx cmpl RC_RND,%ecx je LRound_nearest_24 cmpl RC_CHOP,%ecx je LCheck_truncate_24 cmpl RC_UP,%ecx /* Towards +infinity */ je LUp_24 cmpl RC_DOWN,%ecx /* Towards -infinity */ je LDown_24 #ifdef PARANOID jmp L_bugged_round24 #endif PARANOID LUp_24: cmpb SIGN_POS,SIGN(%edi) jne LCheck_truncate_24 /* If negative then up==truncate */ jmp LCheck_24_round_up LDown_24: cmpb SIGN_POS,SIGN(%edi) je LCheck_truncate_24 /* If positive then down==truncate */ LCheck_24_round_up: movl %eax,%ecx andl $0x000000ff,%ecx orl %ebx,%ecx orl %edx,%ecx jnz LDo_24_round_up jmp LRe_normalise LRound_nearest_24: /* Do rounding of the 24th bit if needed (nearest or even) */ movl %eax,%ecx andl $0x000000ff,%ecx cmpl $0x00000080,%ecx jc LCheck_truncate_24 /* less than half, no increment needed */ jne LGreater_Half_24 /* greater than half, increment needed */ /* Possibly half, we need to check the ls bits */ orl %ebx,%ebx jnz LGreater_Half_24 /* greater than half, increment needed */ orl %edx,%edx jnz LGreater_Half_24 /* greater than half, increment needed */ /* Exactly half, increment only if 24th bit is 1 (round to even) */ testl $0x00000100,%eax jz LDo_truncate_24 LGreater_Half_24: /* Rounding: increment at the 24th bit */ LDo_24_round_up: andl $0xffffff00,%eax /* Truncate to 24 bits */ xorl %ebx,%ebx movb LOST_UP,FPU_bits_lost addl $0x00000100,%eax jmp LCheck_Round_Overflow LCheck_truncate_24: movl %eax,%ecx andl $0x000000ff,%ecx orl %ebx,%ecx orl %edx,%ecx jz LRe_normalise /* No truncation needed */ LDo_truncate_24: andl $0xffffff00,%eax /* Truncate to 24 bits */ xorl %ebx,%ebx movb LOST_DOWN,FPU_bits_lost jmp LRe_normalise /* Round etc to 53 bit precision */ LRound_To_53: movl %esi,%ecx andl CW_RC,%ecx cmpl RC_RND,%ecx je LRound_nearest_53 cmpl RC_CHOP,%ecx je LCheck_truncate_53 cmpl RC_UP,%ecx /* Towards +infinity */ je LUp_53 cmpl RC_DOWN,%ecx /* Towards -infinity */ je LDown_53 #ifdef PARANOID jmp L_bugged_round53 #endif PARANOID LUp_53: cmpb SIGN_POS,SIGN(%edi) jne LCheck_truncate_53 /* If negative then up==truncate */ jmp LCheck_53_round_up LDown_53: cmpb SIGN_POS,SIGN(%edi) je LCheck_truncate_53 /* If positive then down==truncate */ LCheck_53_round_up: movl %ebx,%ecx andl $0x000007ff,%ecx orl %edx,%ecx jnz LDo_53_round_up jmp LRe_normalise LRound_nearest_53: /* Do rounding of the 53rd bit if needed (nearest or even) */ movl %ebx,%ecx andl $0x000007ff,%ecx cmpl $0x00000400,%ecx jc LCheck_truncate_53 /* less than half, no increment needed */ jnz LGreater_Half_53 /* greater than half, increment needed */ /* Possibly half, we need to check the ls bits */ orl %edx,%edx jnz LGreater_Half_53 /* greater than half, increment needed */ /* Exactly half, increment only if 53rd bit is 1 (round to even) */ testl $0x00000800,%ebx jz LTruncate_53 LGreater_Half_53: /* Rounding: increment at the 53rd bit */ LDo_53_round_up: movb LOST_UP,FPU_bits_lost andl $0xfffff800,%ebx /* Truncate to 53 bits */ addl $0x00000800,%ebx adcl $0,%eax jmp LCheck_Round_Overflow LCheck_truncate_53: movl %ebx,%ecx andl $0x000007ff,%ecx orl %edx,%ecx jz LRe_normalise LTruncate_53: movb LOST_DOWN,FPU_bits_lost andl $0xfffff800,%ebx /* Truncate to 53 bits */ jmp LRe_normalise /* Round etc to 64 bit precision */ LRound_To_64: movl %esi,%ecx andl CW_RC,%ecx cmpl RC_RND,%ecx je LRound_nearest_64 cmpl RC_CHOP,%ecx je LCheck_truncate_64 cmpl RC_UP,%ecx /* Towards +infinity */ je LUp_64 cmpl RC_DOWN,%ecx /* Towards -infinity */ je LDown_64 #ifdef PARANOID jmp L_bugged_round64 #endif PARANOID LUp_64: cmpb SIGN_POS,SIGN(%edi) jne LCheck_truncate_64 /* If negative then up==truncate */ orl %edx,%edx jnz LDo_64_round_up jmp LRe_normalise LDown_64: cmpb SIGN_POS,SIGN(%edi) je LCheck_truncate_64 /* If positive then down==truncate */ orl %edx,%edx jnz LDo_64_round_up jmp LRe_normalise LRound_nearest_64: cmpl $0x80000000,%edx jc LCheck_truncate_64 jne LDo_64_round_up /* Now test for round-to-even */ testb $1,%ebx jz LCheck_truncate_64 LDo_64_round_up: movb LOST_UP,FPU_bits_lost addl $1,%ebx adcl $0,%eax LCheck_Round_Overflow: jnc LRe_normalise /* Overflow, adjust the result (significand to 1.0) */ rcrl $1,%eax rcrl $1,%ebx incl EXP(%edi) jmp LRe_normalise LCheck_truncate_64: orl %edx,%edx jz LRe_normalise LTruncate_64: movb LOST_DOWN,FPU_bits_lost LRe_normalise: testb $0xff,FPU_denormal jnz xNormalise_result xL_Normalised: cmpb LOST_UP,FPU_bits_lost je xL_precision_lost_up cmpb LOST_DOWN,FPU_bits_lost je xL_precision_lost_down xL_no_precision_loss: /* store the result */ movb TW_Valid,TAG(%edi) xL_Store_significand: movl %eax,SIGH(%edi) movl %ebx,SIGL(%edi) xorl %eax,%eax /* No errors detected. */ cmpl EXP_OVER,EXP(%edi) jge L_overflow fpu_reg_round_exit: #ifndef NON_REENTRANT_FPU popl %ebx /* adjust the stack pointer */ #endif NON_REENTRANT_FPU fpu_Arith_exit: popl %ebx popl %edi popl %esi leave ret /* * Set the FPU status flags to represent precision loss due to * round-up. */ xL_precision_lost_up: push %eax call SYMBOL_NAME(set_precision_flag_up) popl %eax jmp xL_no_precision_loss /* * Set the FPU status flags to represent precision loss due to * truncation. */ xL_precision_lost_down: push %eax call SYMBOL_NAME(set_precision_flag_down) popl %eax jmp xL_no_precision_loss /* * The number is a denormal (which might get rounded up to a normal) * Shift the number right the required number of bits, which will * have to be undone later... */ xMake_denorm: /* The action to be taken depends upon whether the underflow exception is masked */ testb CW_Underflow,%cl /* Underflow mask. */ jz xUnmasked_underflow /* Do not make a denormal. */ movb DENORMAL,FPU_denormal pushl %ecx /* Save */ movl EXP_UNDER+1,%ecx subl EXP(%edi),%ecx cmpl $64,%ecx /* shrd only works for 0..31 bits */ jnc xDenorm_shift_more_than_63 cmpl $32,%ecx /* shrd only works for 0..31 bits */ jnc xDenorm_shift_more_than_32 /* * We got here without jumps by assuming that the most common requirement * is for a small de-normalising shift. * Shift by [1..31] bits */ addl %ecx,EXP(%edi) orl %edx,%edx /* extension */ setne %ch /* Save whether %edx is non-zero */ xorl %edx,%edx shrd %cl,%ebx,%edx shrd %cl,%eax,%ebx shr %cl,%eax orb %ch,%dl popl %ecx jmp xDenorm_done /* Shift by [32..63] bits */ xDenorm_shift_more_than_32: addl %ecx,EXP(%edi) subb $32,%cl orl %edx,%edx setne %ch orb %ch,%bl xorl %edx,%edx shrd %cl,%ebx,%edx shrd %cl,%eax,%ebx shr %cl,%eax orl %edx,%edx /* test these 32 bits */ setne %cl orb %ch,%bl orb %cl,%bl movl %ebx,%edx movl %eax,%ebx xorl %eax,%eax popl %ecx jmp xDenorm_done /* Shift by [64..) bits */ xDenorm_shift_more_than_63: cmpl $64,%ecx jne xDenorm_shift_more_than_64 /* Exactly 64 bit shift */ addl %ecx,EXP(%edi) xorl %ecx,%ecx orl %edx,%edx setne %cl orl %ebx,%ebx setne %ch orb %ch,%cl orb %cl,%al movl %eax,%edx xorl %eax,%eax xorl %ebx,%ebx popl %ecx jmp xDenorm_done xDenorm_shift_more_than_64: movl EXP_UNDER+1,EXP(%edi) /* This is easy, %eax must be non-zero, so.. */ movl $1,%edx xorl %eax,%eax xorl %ebx,%ebx popl %ecx jmp xDenorm_done xUnmasked_underflow: movb UNMASKED_UNDERFLOW,FPU_denormal jmp xDenorm_done /* Undo the de-normalisation. */ xNormalise_result: cmpb UNMASKED_UNDERFLOW,FPU_denormal je xSignal_underflow /* The number must be a denormal if we got here. */ #ifdef PARANOID /* But check it... just in case. */ cmpl EXP_UNDER+1,EXP(%edi) jne L_norm_bugged #endif PARANOID #ifdef PECULIAR_486 /* * This implements a special feature of 80486 behaviour. * Underflow will be signalled even if the number is * not a denormal after rounding. * This difference occurs only for masked underflow, and not * in the unmasked case. * Actual 80486 behaviour differs from this in some circumstances. */ orl %eax,%eax /* ms bits */ js LNormalise_shift_done /* Will be masked underflow */ #endif PECULIAR_486 orl %eax,%eax /* ms bits */ js xL_Normalised /* No longer a denormal */ jnz LNormalise_shift_up_to_31 /* Shift left 0 - 31 bits */ orl %ebx,%ebx jz L_underflow_to_zero /* The contents are zero */ /* Shift left 32 - 63 bits */ movl %ebx,%eax xorl %ebx,%ebx subl $32,EXP(%edi) LNormalise_shift_up_to_31: bsrl %eax,%ecx /* get the required shift in %ecx */ subl $31,%ecx negl %ecx shld %cl,%ebx,%eax shl %cl,%ebx subl %ecx,EXP(%edi) LNormalise_shift_done: testb $0xff,FPU_bits_lost /* bits lost == underflow */ jz xL_Normalised /* There must be a masked underflow */ push %eax pushl EX_Underflow call SYMBOL_NAME(FPU_exception) popl %eax popl %eax jmp xL_Normalised /* * The operations resulted in a number too small to represent. * Masked response. */ L_underflow_to_zero: push %eax call SYMBOL_NAME(set_precision_flag_down) popl %eax push %eax pushl EX_Underflow call SYMBOL_NAME(FPU_exception) popl %eax popl %eax /* Reduce the exponent to EXP_UNDER */ movl EXP_UNDER,EXP(%edi) movb TW_Zero,TAG(%edi) jmp xL_Store_significand /* The operations resulted in a number too large to represent. */ L_overflow: push %edi call SYMBOL_NAME(arith_overflow) pop %edi jmp fpu_reg_round_exit xSignal_underflow: /* The number may have been changed to a non-denormal */ /* by the rounding operations. */ cmpl EXP_UNDER,EXP(%edi) jle xDo_unmasked_underflow jmp xL_Normalised xDo_unmasked_underflow: /* Increase the exponent by the magic number */ addl $(3*(1<<13)),EXP(%edi) push %eax pushl EX_Underflow call EXCEPTION popl %eax popl %eax jmp xL_Normalised #ifdef PARANOID #ifdef PECULIAR_486 L_bugged_denorm_486: pushl EX_INTERNAL|0x236 call EXCEPTION popl %ebx jmp L_exception_exit #else L_bugged_denorm: pushl EX_INTERNAL|0x230 call EXCEPTION popl %ebx jmp L_exception_exit #endif PECULIAR_486 L_bugged_round24: pushl EX_INTERNAL|0x231 call EXCEPTION popl %ebx jmp L_exception_exit L_bugged_round53: pushl EX_INTERNAL|0x232 call EXCEPTION popl %ebx jmp L_exception_exit L_bugged_round64: pushl EX_INTERNAL|0x233 call EXCEPTION popl %ebx jmp L_exception_exit L_norm_bugged: pushl EX_INTERNAL|0x234 call EXCEPTION popl %ebx jmp L_exception_exit L_entry_bugged: pushl EX_INTERNAL|0x235 call EXCEPTION popl %ebx L_exception_exit: mov $1,%eax jmp fpu_reg_round_exit #endif PARANOID