wm_sqrt.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. .file "wm_sqrt.S"
  2. /*---------------------------------------------------------------------------+
  3. | wm_sqrt.S |
  4. | |
  5. | Fixed point arithmetic square root evaluation. |
  6. | |
  7. | Copyright (C) 1992,1993,1995,1997 |
  8. | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
  9. | Australia. E-mail billm@suburbia.net |
  10. | |
  11. | Call from C as: |
  12. | int wm_sqrt(FPU_REG *n, unsigned int control_word) |
  13. | |
  14. +---------------------------------------------------------------------------*/
  15. /*---------------------------------------------------------------------------+
  16. | wm_sqrt(FPU_REG *n, unsigned int control_word) |
  17. | returns the square root of n in n. |
  18. | |
  19. | Use Newton's method to compute the square root of a number, which must |
  20. | be in the range [1.0 .. 4.0), to 64 bits accuracy. |
  21. | Does not check the sign or tag of the argument. |
  22. | Sets the exponent, but not the sign or tag of the result. |
  23. | |
  24. | The guess is kept in %esi:%edi |
  25. +---------------------------------------------------------------------------*/
  26. #include "exception.h"
  27. #include "fpu_emu.h"
  28. #ifndef NON_REENTRANT_FPU
  29. /* Local storage on the stack: */
  30. #define FPU_accum_3 -4(%ebp) /* ms word */
  31. #define FPU_accum_2 -8(%ebp)
  32. #define FPU_accum_1 -12(%ebp)
  33. #define FPU_accum_0 -16(%ebp)
  34. /*
  35. * The de-normalised argument:
  36. * sq_2 sq_1 sq_0
  37. * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
  38. * ^ binary point here
  39. */
  40. #define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
  41. #define FPU_fsqrt_arg_1 -24(%ebp)
  42. #define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
  43. #else
  44. /* Local storage in a static area: */
  45. .data
  46. .align 4,0
  47. FPU_accum_3:
  48. .long 0 /* ms word */
  49. FPU_accum_2:
  50. .long 0
  51. FPU_accum_1:
  52. .long 0
  53. FPU_accum_0:
  54. .long 0
  55. /* The de-normalised argument:
  56. sq_2 sq_1 sq_0
  57. b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
  58. ^ binary point here
  59. */
  60. FPU_fsqrt_arg_2:
  61. .long 0 /* ms word */
  62. FPU_fsqrt_arg_1:
  63. .long 0
  64. FPU_fsqrt_arg_0:
  65. .long 0 /* ls word, at most the ms bit is set */
  66. #endif /* NON_REENTRANT_FPU */
  67. .text
  68. ENTRY(wm_sqrt)
  69. pushl %ebp
  70. movl %esp,%ebp
  71. #ifndef NON_REENTRANT_FPU
  72. subl $28,%esp
  73. #endif /* NON_REENTRANT_FPU */
  74. pushl %esi
  75. pushl %edi
  76. pushl %ebx
  77. movl PARAM1,%esi
  78. movl SIGH(%esi),%eax
  79. movl SIGL(%esi),%ecx
  80. xorl %edx,%edx
  81. /* We use a rough linear estimate for the first guess.. */
  82. cmpw EXP_BIAS,EXP(%esi)
  83. jnz sqrt_arg_ge_2
  84. shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
  85. rcrl $1,%ecx
  86. rcrl $1,%edx
  87. sqrt_arg_ge_2:
  88. /* From here on, n is never accessed directly again until it is
  89. replaced by the answer. */
  90. movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
  91. movl %ecx,FPU_fsqrt_arg_1
  92. movl %edx,FPU_fsqrt_arg_0
  93. /* Make a linear first estimate */
  94. shrl $1,%eax
  95. addl $0x40000000,%eax
  96. movl $0xaaaaaaaa,%ecx
  97. mull %ecx
  98. shll %edx /* max result was 7fff... */
  99. testl $0x80000000,%edx /* but min was 3fff... */
  100. jnz sqrt_prelim_no_adjust
  101. movl $0x80000000,%edx /* round up */
  102. sqrt_prelim_no_adjust:
  103. movl %edx,%esi /* Our first guess */
  104. /* We have now computed (approx) (2 + x) / 3, which forms the basis
  105. for a few iterations of Newton's method */
  106. movl FPU_fsqrt_arg_2,%ecx /* ms word */
  107. /*
  108. * From our initial estimate, three iterations are enough to get us
  109. * to 30 bits or so. This will then allow two iterations at better
  110. * precision to complete the process.
  111. */
  112. /* Compute (g + n/g)/2 at each iteration (g is the guess). */
  113. shrl %ecx /* Doing this first will prevent a divide */
  114. /* overflow later. */
  115. movl %ecx,%edx /* msw of the arg / 2 */
  116. divl %esi /* current estimate */
  117. shrl %esi /* divide by 2 */
  118. addl %eax,%esi /* the new estimate */
  119. movl %ecx,%edx
  120. divl %esi
  121. shrl %esi
  122. addl %eax,%esi
  123. movl %ecx,%edx
  124. divl %esi
  125. shrl %esi
  126. addl %eax,%esi
  127. /*
  128. * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
  129. * we improve it to 60 bits or so.
  130. *
  131. * The strategy from now on is to compute new estimates from
  132. * guess := guess + (n - guess^2) / (2 * guess)
  133. */
  134. /* First, find the square of the guess */
  135. movl %esi,%eax
  136. mull %esi
  137. /* guess^2 now in %edx:%eax */
  138. movl FPU_fsqrt_arg_1,%ecx
  139. subl %ecx,%eax
  140. movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
  141. sbbl %ecx,%edx
  142. jnc sqrt_stage_2_positive
  143. /* Subtraction gives a negative result,
  144. negate the result before division. */
  145. notl %edx
  146. notl %eax
  147. addl $1,%eax
  148. adcl $0,%edx
  149. divl %esi
  150. movl %eax,%ecx
  151. movl %edx,%eax
  152. divl %esi
  153. jmp sqrt_stage_2_finish
  154. sqrt_stage_2_positive:
  155. divl %esi
  156. movl %eax,%ecx
  157. movl %edx,%eax
  158. divl %esi
  159. notl %ecx
  160. notl %eax
  161. addl $1,%eax
  162. adcl $0,%ecx
  163. sqrt_stage_2_finish:
  164. sarl $1,%ecx /* divide by 2 */
  165. rcrl $1,%eax
  166. /* Form the new estimate in %esi:%edi */
  167. movl %eax,%edi
  168. addl %ecx,%esi
  169. jnz sqrt_stage_2_done /* result should be [1..2) */
  170. #ifdef PARANOID
  171. /* It should be possible to get here only if the arg is ffff....ffff */
  172. cmp $0xffffffff,FPU_fsqrt_arg_1
  173. jnz sqrt_stage_2_error
  174. #endif /* PARANOID */
  175. /* The best rounded result. */
  176. xorl %eax,%eax
  177. decl %eax
  178. movl %eax,%edi
  179. movl %eax,%esi
  180. movl $0x7fffffff,%eax
  181. jmp sqrt_round_result
  182. #ifdef PARANOID
  183. sqrt_stage_2_error:
  184. pushl EX_INTERNAL|0x213
  185. call EXCEPTION
  186. #endif /* PARANOID */
  187. sqrt_stage_2_done:
  188. /* Now the square root has been computed to better than 60 bits. */
  189. /* Find the square of the guess. */
  190. movl %edi,%eax /* ls word of guess */
  191. mull %edi
  192. movl %edx,FPU_accum_1
  193. movl %esi,%eax
  194. mull %esi
  195. movl %edx,FPU_accum_3
  196. movl %eax,FPU_accum_2
  197. movl %edi,%eax
  198. mull %esi
  199. addl %eax,FPU_accum_1
  200. adcl %edx,FPU_accum_2
  201. adcl $0,FPU_accum_3
  202. /* movl %esi,%eax */
  203. /* mull %edi */
  204. addl %eax,FPU_accum_1
  205. adcl %edx,FPU_accum_2
  206. adcl $0,FPU_accum_3
  207. /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
  208. movl FPU_fsqrt_arg_0,%eax /* get normalized n */
  209. subl %eax,FPU_accum_1
  210. movl FPU_fsqrt_arg_1,%eax
  211. sbbl %eax,FPU_accum_2
  212. movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
  213. sbbl %eax,FPU_accum_3
  214. jnc sqrt_stage_3_positive
  215. /* Subtraction gives a negative result,
  216. negate the result before division */
  217. notl FPU_accum_1
  218. notl FPU_accum_2
  219. notl FPU_accum_3
  220. addl $1,FPU_accum_1
  221. adcl $0,FPU_accum_2
  222. #ifdef PARANOID
  223. adcl $0,FPU_accum_3 /* This must be zero */
  224. jz sqrt_stage_3_no_error
  225. sqrt_stage_3_error:
  226. pushl EX_INTERNAL|0x207
  227. call EXCEPTION
  228. sqrt_stage_3_no_error:
  229. #endif /* PARANOID */
  230. movl FPU_accum_2,%edx
  231. movl FPU_accum_1,%eax
  232. divl %esi
  233. movl %eax,%ecx
  234. movl %edx,%eax
  235. divl %esi
  236. sarl $1,%ecx /* divide by 2 */
  237. rcrl $1,%eax
  238. /* prepare to round the result */
  239. addl %ecx,%edi
  240. adcl $0,%esi
  241. jmp sqrt_stage_3_finished
  242. sqrt_stage_3_positive:
  243. movl FPU_accum_2,%edx
  244. movl FPU_accum_1,%eax
  245. divl %esi
  246. movl %eax,%ecx
  247. movl %edx,%eax
  248. divl %esi
  249. sarl $1,%ecx /* divide by 2 */
  250. rcrl $1,%eax
  251. /* prepare to round the result */
  252. notl %eax /* Negate the correction term */
  253. notl %ecx
  254. addl $1,%eax
  255. adcl $0,%ecx /* carry here ==> correction == 0 */
  256. adcl $0xffffffff,%esi
  257. addl %ecx,%edi
  258. adcl $0,%esi
  259. sqrt_stage_3_finished:
  260. /*
  261. * The result in %esi:%edi:%esi should be good to about 90 bits here,
  262. * and the rounding information here does not have sufficient accuracy
  263. * in a few rare cases.
  264. */
  265. cmpl $0xffffffe0,%eax
  266. ja sqrt_near_exact_x
  267. cmpl $0x00000020,%eax
  268. jb sqrt_near_exact
  269. cmpl $0x7fffffe0,%eax
  270. jb sqrt_round_result
  271. cmpl $0x80000020,%eax
  272. jb sqrt_get_more_precision
  273. sqrt_round_result:
  274. /* Set up for rounding operations */
  275. movl %eax,%edx
  276. movl %esi,%eax
  277. movl %edi,%ebx
  278. movl PARAM1,%edi
  279. movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
  280. jmp fpu_reg_round
  281. sqrt_near_exact_x:
  282. /* First, the estimate must be rounded up. */
  283. addl $1,%edi
  284. adcl $0,%esi
  285. sqrt_near_exact:
  286. /*
  287. * This is an easy case because x^1/2 is monotonic.
  288. * We need just find the square of our estimate, compare it
  289. * with the argument, and deduce whether our estimate is
  290. * above, below, or exact. We use the fact that the estimate
  291. * is known to be accurate to about 90 bits.
  292. */
  293. movl %edi,%eax /* ls word of guess */
  294. mull %edi
  295. movl %edx,%ebx /* 2nd ls word of square */
  296. movl %eax,%ecx /* ls word of square */
  297. movl %edi,%eax
  298. mull %esi
  299. addl %eax,%ebx
  300. addl %eax,%ebx
  301. #ifdef PARANOID
  302. cmp $0xffffffb0,%ebx
  303. jb sqrt_near_exact_ok
  304. cmp $0x00000050,%ebx
  305. ja sqrt_near_exact_ok
  306. pushl EX_INTERNAL|0x214
  307. call EXCEPTION
  308. sqrt_near_exact_ok:
  309. #endif /* PARANOID */
  310. or %ebx,%ebx
  311. js sqrt_near_exact_small
  312. jnz sqrt_near_exact_large
  313. or %ebx,%edx
  314. jnz sqrt_near_exact_large
  315. /* Our estimate is exactly the right answer */
  316. xorl %eax,%eax
  317. jmp sqrt_round_result
  318. sqrt_near_exact_small:
  319. /* Our estimate is too small */
  320. movl $0x000000ff,%eax
  321. jmp sqrt_round_result
  322. sqrt_near_exact_large:
  323. /* Our estimate is too large, we need to decrement it */
  324. subl $1,%edi
  325. sbbl $0,%esi
  326. movl $0xffffff00,%eax
  327. jmp sqrt_round_result
  328. sqrt_get_more_precision:
  329. /* This case is almost the same as the above, except we start
  330. with an extra bit of precision in the estimate. */
  331. stc /* The extra bit. */
  332. rcll $1,%edi /* Shift the estimate left one bit */
  333. rcll $1,%esi
  334. movl %edi,%eax /* ls word of guess */
  335. mull %edi
  336. movl %edx,%ebx /* 2nd ls word of square */
  337. movl %eax,%ecx /* ls word of square */
  338. movl %edi,%eax
  339. mull %esi
  340. addl %eax,%ebx
  341. addl %eax,%ebx
  342. /* Put our estimate back to its original value */
  343. stc /* The ms bit. */
  344. rcrl $1,%esi /* Shift the estimate left one bit */
  345. rcrl $1,%edi
  346. #ifdef PARANOID
  347. cmp $0xffffff60,%ebx
  348. jb sqrt_more_prec_ok
  349. cmp $0x000000a0,%ebx
  350. ja sqrt_more_prec_ok
  351. pushl EX_INTERNAL|0x215
  352. call EXCEPTION
  353. sqrt_more_prec_ok:
  354. #endif /* PARANOID */
  355. or %ebx,%ebx
  356. js sqrt_more_prec_small
  357. jnz sqrt_more_prec_large
  358. or %ebx,%ecx
  359. jnz sqrt_more_prec_large
  360. /* Our estimate is exactly the right answer */
  361. movl $0x80000000,%eax
  362. jmp sqrt_round_result
  363. sqrt_more_prec_small:
  364. /* Our estimate is too small */
  365. movl $0x800000ff,%eax
  366. jmp sqrt_round_result
  367. sqrt_more_prec_large:
  368. /* Our estimate is too large */
  369. movl $0x7fffff00,%eax
  370. jmp sqrt_round_result