wm_shrx.S 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. .file "wm_shrx.S"
  2. /*---------------------------------------------------------------------------+
  3. | wm_shrx.S |
  4. | |
  5. | 64 bit right shift functions |
  6. | |
  7. | Copyright (C) 1992,1995 |
  8. | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
  9. | Australia. E-mail billm@jacobi.maths.monash.edu.au |
  10. | |
  11. | Call from C as: |
  12. | unsigned FPU_shrx(void *arg1, unsigned arg2) |
  13. | and |
  14. | unsigned FPU_shrxs(void *arg1, unsigned arg2) |
  15. | |
  16. +---------------------------------------------------------------------------*/
  17. #include "fpu_emu.h"
  18. .text
  19. /*---------------------------------------------------------------------------+
  20. | unsigned FPU_shrx(void *arg1, unsigned arg2) |
  21. | |
  22. | Extended shift right function. |
  23. | Fastest for small shifts. |
  24. | Shifts the 64 bit quantity pointed to by the first arg (arg1) |
  25. | right by the number of bits specified by the second arg (arg2). |
  26. | Forms a 96 bit quantity from the 64 bit arg and eax: |
  27. | [ 64 bit arg ][ eax ] |
  28. | shift right ---------> |
  29. | The eax register is initialized to 0 before the shifting. |
  30. | Results returned in the 64 bit arg and eax. |
  31. +---------------------------------------------------------------------------*/
  32. ENTRY(FPU_shrx)
  33. push %ebp
  34. movl %esp,%ebp
  35. pushl %esi
  36. movl PARAM2,%ecx
  37. movl PARAM1,%esi
  38. cmpl $32,%ecx /* shrd only works for 0..31 bits */
  39. jnc L_more_than_31
  40. /* less than 32 bits */
  41. pushl %ebx
  42. movl (%esi),%ebx /* lsl */
  43. movl 4(%esi),%edx /* msl */
  44. xorl %eax,%eax /* extension */
  45. shrd %cl,%ebx,%eax
  46. shrd %cl,%edx,%ebx
  47. shr %cl,%edx
  48. movl %ebx,(%esi)
  49. movl %edx,4(%esi)
  50. popl %ebx
  51. popl %esi
  52. leave
  53. ret
  54. L_more_than_31:
  55. cmpl $64,%ecx
  56. jnc L_more_than_63
  57. subb $32,%cl
  58. movl (%esi),%eax /* lsl */
  59. movl 4(%esi),%edx /* msl */
  60. shrd %cl,%edx,%eax
  61. shr %cl,%edx
  62. movl %edx,(%esi)
  63. movl $0,4(%esi)
  64. popl %esi
  65. leave
  66. ret
  67. L_more_than_63:
  68. cmpl $96,%ecx
  69. jnc L_more_than_95
  70. subb $64,%cl
  71. movl 4(%esi),%eax /* msl */
  72. shr %cl,%eax
  73. xorl %edx,%edx
  74. movl %edx,(%esi)
  75. movl %edx,4(%esi)
  76. popl %esi
  77. leave
  78. ret
  79. L_more_than_95:
  80. xorl %eax,%eax
  81. movl %eax,(%esi)
  82. movl %eax,4(%esi)
  83. popl %esi
  84. leave
  85. ret
  86. /*---------------------------------------------------------------------------+
  87. | unsigned FPU_shrxs(void *arg1, unsigned arg2) |
  88. | |
  89. | Extended shift right function (optimized for small floating point |
  90. | integers). |
  91. | Shifts the 64 bit quantity pointed to by the first arg (arg1) |
  92. | right by the number of bits specified by the second arg (arg2). |
  93. | Forms a 96 bit quantity from the 64 bit arg and eax: |
  94. | [ 64 bit arg ][ eax ] |
  95. | shift right ---------> |
  96. | The eax register is initialized to 0 before the shifting. |
  97. | The lower 8 bits of eax are lost and replaced by a flag which is |
  98. | set (to 0x01) if any bit, apart from the first one, is set in the |
  99. | part which has been shifted out of the arg. |
  100. | Results returned in the 64 bit arg and eax. |
  101. +---------------------------------------------------------------------------*/
  102. ENTRY(FPU_shrxs)
  103. push %ebp
  104. movl %esp,%ebp
  105. pushl %esi
  106. pushl %ebx
  107. movl PARAM2,%ecx
  108. movl PARAM1,%esi
  109. cmpl $64,%ecx /* shrd only works for 0..31 bits */
  110. jnc Ls_more_than_63
  111. cmpl $32,%ecx /* shrd only works for 0..31 bits */
  112. jc Ls_less_than_32
  113. /* We got here without jumps by assuming that the most common requirement
  114. is for small integers */
  115. /* Shift by [32..63] bits */
  116. subb $32,%cl
  117. movl (%esi),%eax /* lsl */
  118. movl 4(%esi),%edx /* msl */
  119. xorl %ebx,%ebx
  120. shrd %cl,%eax,%ebx
  121. shrd %cl,%edx,%eax
  122. shr %cl,%edx
  123. orl %ebx,%ebx /* test these 32 bits */
  124. setne %bl
  125. test $0x7fffffff,%eax /* and 31 bits here */
  126. setne %bh
  127. orw %bx,%bx /* Any of the 63 bit set ? */
  128. setne %al
  129. movl %edx,(%esi)
  130. movl $0,4(%esi)
  131. popl %ebx
  132. popl %esi
  133. leave
  134. ret
  135. /* Shift by [0..31] bits */
  136. Ls_less_than_32:
  137. movl (%esi),%ebx /* lsl */
  138. movl 4(%esi),%edx /* msl */
  139. xorl %eax,%eax /* extension */
  140. shrd %cl,%ebx,%eax
  141. shrd %cl,%edx,%ebx
  142. shr %cl,%edx
  143. test $0x7fffffff,%eax /* only need to look at eax here */
  144. setne %al
  145. movl %ebx,(%esi)
  146. movl %edx,4(%esi)
  147. popl %ebx
  148. popl %esi
  149. leave
  150. ret
  151. /* Shift by [64..95] bits */
  152. Ls_more_than_63:
  153. cmpl $96,%ecx
  154. jnc Ls_more_than_95
  155. subb $64,%cl
  156. movl (%esi),%ebx /* lsl */
  157. movl 4(%esi),%eax /* msl */
  158. xorl %edx,%edx /* extension */
  159. shrd %cl,%ebx,%edx
  160. shrd %cl,%eax,%ebx
  161. shr %cl,%eax
  162. orl %ebx,%edx
  163. setne %bl
  164. test $0x7fffffff,%eax /* only need to look at eax here */
  165. setne %bh
  166. orw %bx,%bx
  167. setne %al
  168. xorl %edx,%edx
  169. movl %edx,(%esi) /* set to zero */
  170. movl %edx,4(%esi) /* set to zero */
  171. popl %ebx
  172. popl %esi
  173. leave
  174. ret
  175. Ls_more_than_95:
  176. /* Shift by [96..inf) bits */
  177. xorl %eax,%eax
  178. movl (%esi),%ebx
  179. orl 4(%esi),%ebx
  180. setne %al
  181. xorl %ebx,%ebx
  182. movl %ebx,(%esi)
  183. movl %ebx,4(%esi)
  184. popl %ebx
  185. popl %esi
  186. leave
  187. ret