123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470 |
- .file "wm_sqrt.S"
- /*---------------------------------------------------------------------------+
- | wm_sqrt.S |
- | |
- | Fixed point arithmetic square root evaluation. |
- | |
- | Copyright (C) 1992,1993,1995,1997 |
- | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
- | Australia. E-mail billm@suburbia.net |
- | |
- | Call from C as: |
- | int wm_sqrt(FPU_REG *n, unsigned int control_word) |
- | |
- +---------------------------------------------------------------------------*/
- /*---------------------------------------------------------------------------+
- | wm_sqrt(FPU_REG *n, unsigned int control_word) |
- | returns the square root of n in n. |
- | |
- | Use Newton's method to compute the square root of a number, which must |
- | be in the range [1.0 .. 4.0), to 64 bits accuracy. |
- | Does not check the sign or tag of the argument. |
- | Sets the exponent, but not the sign or tag of the result. |
- | |
- | The guess is kept in %esi:%edi |
- +---------------------------------------------------------------------------*/
- #include "exception.h"
- #include "fpu_emu.h"
- #ifndef NON_REENTRANT_FPU
- /* Local storage on the stack: */
- #define FPU_accum_3 -4(%ebp) /* ms word */
- #define FPU_accum_2 -8(%ebp)
- #define FPU_accum_1 -12(%ebp)
- #define FPU_accum_0 -16(%ebp)
- /*
- * The de-normalised argument:
- * sq_2 sq_1 sq_0
- * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
- * ^ binary point here
- */
- #define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
- #define FPU_fsqrt_arg_1 -24(%ebp)
- #define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
- #else
- /* Local storage in a static area: */
- .data
- .align 4,0
- FPU_accum_3:
- .long 0 /* ms word */
- FPU_accum_2:
- .long 0
- FPU_accum_1:
- .long 0
- FPU_accum_0:
- .long 0
- /* The de-normalised argument:
- sq_2 sq_1 sq_0
- b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
- ^ binary point here
- */
- FPU_fsqrt_arg_2:
- .long 0 /* ms word */
- FPU_fsqrt_arg_1:
- .long 0
- FPU_fsqrt_arg_0:
- .long 0 /* ls word, at most the ms bit is set */
- #endif /* NON_REENTRANT_FPU */
- .text
- ENTRY(wm_sqrt)
- pushl %ebp
- movl %esp,%ebp
- #ifndef NON_REENTRANT_FPU
- subl $28,%esp
- #endif /* NON_REENTRANT_FPU */
- pushl %esi
- pushl %edi
- pushl %ebx
- movl PARAM1,%esi
- movl SIGH(%esi),%eax
- movl SIGL(%esi),%ecx
- xorl %edx,%edx
- /* We use a rough linear estimate for the first guess.. */
- cmpw EXP_BIAS,EXP(%esi)
- jnz sqrt_arg_ge_2
- shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
- rcrl $1,%ecx
- rcrl $1,%edx
- sqrt_arg_ge_2:
- /* From here on, n is never accessed directly again until it is
- replaced by the answer. */
- movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
- movl %ecx,FPU_fsqrt_arg_1
- movl %edx,FPU_fsqrt_arg_0
- /* Make a linear first estimate */
- shrl $1,%eax
- addl $0x40000000,%eax
- movl $0xaaaaaaaa,%ecx
- mull %ecx
- shll %edx /* max result was 7fff... */
- testl $0x80000000,%edx /* but min was 3fff... */
- jnz sqrt_prelim_no_adjust
- movl $0x80000000,%edx /* round up */
- sqrt_prelim_no_adjust:
- movl %edx,%esi /* Our first guess */
- /* We have now computed (approx) (2 + x) / 3, which forms the basis
- for a few iterations of Newton's method */
- movl FPU_fsqrt_arg_2,%ecx /* ms word */
- /*
- * From our initial estimate, three iterations are enough to get us
- * to 30 bits or so. This will then allow two iterations at better
- * precision to complete the process.
- */
- /* Compute (g + n/g)/2 at each iteration (g is the guess). */
- shrl %ecx /* Doing this first will prevent a divide */
- /* overflow later. */
- movl %ecx,%edx /* msw of the arg / 2 */
- divl %esi /* current estimate */
- shrl %esi /* divide by 2 */
- addl %eax,%esi /* the new estimate */
- movl %ecx,%edx
- divl %esi
- shrl %esi
- addl %eax,%esi
- movl %ecx,%edx
- divl %esi
- shrl %esi
- addl %eax,%esi
- /*
- * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
- * we improve it to 60 bits or so.
- *
- * The strategy from now on is to compute new estimates from
- * guess := guess + (n - guess^2) / (2 * guess)
- */
- /* First, find the square of the guess */
- movl %esi,%eax
- mull %esi
- /* guess^2 now in %edx:%eax */
- movl FPU_fsqrt_arg_1,%ecx
- subl %ecx,%eax
- movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
- sbbl %ecx,%edx
- jnc sqrt_stage_2_positive
- /* Subtraction gives a negative result,
- negate the result before division. */
- notl %edx
- notl %eax
- addl $1,%eax
- adcl $0,%edx
- divl %esi
- movl %eax,%ecx
- movl %edx,%eax
- divl %esi
- jmp sqrt_stage_2_finish
- sqrt_stage_2_positive:
- divl %esi
- movl %eax,%ecx
- movl %edx,%eax
- divl %esi
- notl %ecx
- notl %eax
- addl $1,%eax
- adcl $0,%ecx
- sqrt_stage_2_finish:
- sarl $1,%ecx /* divide by 2 */
- rcrl $1,%eax
- /* Form the new estimate in %esi:%edi */
- movl %eax,%edi
- addl %ecx,%esi
- jnz sqrt_stage_2_done /* result should be [1..2) */
- #ifdef PARANOID
- /* It should be possible to get here only if the arg is ffff....ffff */
- cmp $0xffffffff,FPU_fsqrt_arg_1
- jnz sqrt_stage_2_error
- #endif /* PARANOID */
- /* The best rounded result. */
- xorl %eax,%eax
- decl %eax
- movl %eax,%edi
- movl %eax,%esi
- movl $0x7fffffff,%eax
- jmp sqrt_round_result
- #ifdef PARANOID
- sqrt_stage_2_error:
- pushl EX_INTERNAL|0x213
- call EXCEPTION
- #endif /* PARANOID */
- sqrt_stage_2_done:
- /* Now the square root has been computed to better than 60 bits. */
- /* Find the square of the guess. */
- movl %edi,%eax /* ls word of guess */
- mull %edi
- movl %edx,FPU_accum_1
- movl %esi,%eax
- mull %esi
- movl %edx,FPU_accum_3
- movl %eax,FPU_accum_2
- movl %edi,%eax
- mull %esi
- addl %eax,FPU_accum_1
- adcl %edx,FPU_accum_2
- adcl $0,FPU_accum_3
- /* movl %esi,%eax */
- /* mull %edi */
- addl %eax,FPU_accum_1
- adcl %edx,FPU_accum_2
- adcl $0,FPU_accum_3
- /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
- movl FPU_fsqrt_arg_0,%eax /* get normalized n */
- subl %eax,FPU_accum_1
- movl FPU_fsqrt_arg_1,%eax
- sbbl %eax,FPU_accum_2
- movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
- sbbl %eax,FPU_accum_3
- jnc sqrt_stage_3_positive
- /* Subtraction gives a negative result,
- negate the result before division */
- notl FPU_accum_1
- notl FPU_accum_2
- notl FPU_accum_3
- addl $1,FPU_accum_1
- adcl $0,FPU_accum_2
- #ifdef PARANOID
- adcl $0,FPU_accum_3 /* This must be zero */
- jz sqrt_stage_3_no_error
- sqrt_stage_3_error:
- pushl EX_INTERNAL|0x207
- call EXCEPTION
- sqrt_stage_3_no_error:
- #endif /* PARANOID */
- movl FPU_accum_2,%edx
- movl FPU_accum_1,%eax
- divl %esi
- movl %eax,%ecx
- movl %edx,%eax
- divl %esi
- sarl $1,%ecx /* divide by 2 */
- rcrl $1,%eax
- /* prepare to round the result */
- addl %ecx,%edi
- adcl $0,%esi
- jmp sqrt_stage_3_finished
- sqrt_stage_3_positive:
- movl FPU_accum_2,%edx
- movl FPU_accum_1,%eax
- divl %esi
- movl %eax,%ecx
- movl %edx,%eax
- divl %esi
- sarl $1,%ecx /* divide by 2 */
- rcrl $1,%eax
- /* prepare to round the result */
- notl %eax /* Negate the correction term */
- notl %ecx
- addl $1,%eax
- adcl $0,%ecx /* carry here ==> correction == 0 */
- adcl $0xffffffff,%esi
- addl %ecx,%edi
- adcl $0,%esi
- sqrt_stage_3_finished:
- /*
- * The result in %esi:%edi:%esi should be good to about 90 bits here,
- * and the rounding information here does not have sufficient accuracy
- * in a few rare cases.
- */
- cmpl $0xffffffe0,%eax
- ja sqrt_near_exact_x
- cmpl $0x00000020,%eax
- jb sqrt_near_exact
- cmpl $0x7fffffe0,%eax
- jb sqrt_round_result
- cmpl $0x80000020,%eax
- jb sqrt_get_more_precision
- sqrt_round_result:
- /* Set up for rounding operations */
- movl %eax,%edx
- movl %esi,%eax
- movl %edi,%ebx
- movl PARAM1,%edi
- movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
- jmp fpu_reg_round
- sqrt_near_exact_x:
- /* First, the estimate must be rounded up. */
- addl $1,%edi
- adcl $0,%esi
- sqrt_near_exact:
- /*
- * This is an easy case because x^1/2 is monotonic.
- * We need just find the square of our estimate, compare it
- * with the argument, and deduce whether our estimate is
- * above, below, or exact. We use the fact that the estimate
- * is known to be accurate to about 90 bits.
- */
- movl %edi,%eax /* ls word of guess */
- mull %edi
- movl %edx,%ebx /* 2nd ls word of square */
- movl %eax,%ecx /* ls word of square */
- movl %edi,%eax
- mull %esi
- addl %eax,%ebx
- addl %eax,%ebx
- #ifdef PARANOID
- cmp $0xffffffb0,%ebx
- jb sqrt_near_exact_ok
- cmp $0x00000050,%ebx
- ja sqrt_near_exact_ok
- pushl EX_INTERNAL|0x214
- call EXCEPTION
- sqrt_near_exact_ok:
- #endif /* PARANOID */
- or %ebx,%ebx
- js sqrt_near_exact_small
- jnz sqrt_near_exact_large
- or %ebx,%edx
- jnz sqrt_near_exact_large
- /* Our estimate is exactly the right answer */
- xorl %eax,%eax
- jmp sqrt_round_result
- sqrt_near_exact_small:
- /* Our estimate is too small */
- movl $0x000000ff,%eax
- jmp sqrt_round_result
-
- sqrt_near_exact_large:
- /* Our estimate is too large, we need to decrement it */
- subl $1,%edi
- sbbl $0,%esi
- movl $0xffffff00,%eax
- jmp sqrt_round_result
- sqrt_get_more_precision:
- /* This case is almost the same as the above, except we start
- with an extra bit of precision in the estimate. */
- stc /* The extra bit. */
- rcll $1,%edi /* Shift the estimate left one bit */
- rcll $1,%esi
- movl %edi,%eax /* ls word of guess */
- mull %edi
- movl %edx,%ebx /* 2nd ls word of square */
- movl %eax,%ecx /* ls word of square */
- movl %edi,%eax
- mull %esi
- addl %eax,%ebx
- addl %eax,%ebx
- /* Put our estimate back to its original value */
- stc /* The ms bit. */
- rcrl $1,%esi /* Shift the estimate left one bit */
- rcrl $1,%edi
- #ifdef PARANOID
- cmp $0xffffff60,%ebx
- jb sqrt_more_prec_ok
- cmp $0x000000a0,%ebx
- ja sqrt_more_prec_ok
- pushl EX_INTERNAL|0x215
- call EXCEPTION
- sqrt_more_prec_ok:
- #endif /* PARANOID */
- or %ebx,%ebx
- js sqrt_more_prec_small
- jnz sqrt_more_prec_large
- or %ebx,%ecx
- jnz sqrt_more_prec_large
- /* Our estimate is exactly the right answer */
- movl $0x80000000,%eax
- jmp sqrt_round_result
- sqrt_more_prec_small:
- /* Our estimate is too small */
- movl $0x800000ff,%eax
- jmp sqrt_round_result
-
- sqrt_more_prec_large:
- /* Our estimate is too large */
- movl $0x7fffff00,%eax
- jmp sqrt_round_result
|