multi_arith.h 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. /* multi_arith.h: multi-precision integer arithmetic functions, needed
  2. to do extended-precision floating point.
  3. (c) 1998 David Huggins-Daines.
  4. Somewhat based on arch/alpha/math-emu/ieee-math.c, which is (c)
  5. David Mosberger-Tang.
  6. You may copy, modify, and redistribute this file under the terms of
  7. the GNU General Public License, version 2, or any later version, at
  8. your convenience. */
  9. /* Note:
  10. These are not general multi-precision math routines. Rather, they
  11. implement the subset of integer arithmetic that we need in order to
  12. multiply, divide, and normalize 128-bit unsigned mantissae. */
  13. #ifndef MULTI_ARITH_H
  14. #define MULTI_ARITH_H
  15. static inline void fp_denormalize(struct fp_ext *reg, unsigned int cnt)
  16. {
  17. reg->exp += cnt;
  18. switch (cnt) {
  19. case 0 ... 8:
  20. reg->lowmant = reg->mant.m32[1] << (8 - cnt);
  21. reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
  22. (reg->mant.m32[0] << (32 - cnt));
  23. reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
  24. break;
  25. case 9 ... 32:
  26. reg->lowmant = reg->mant.m32[1] >> (cnt - 8);
  27. if (reg->mant.m32[1] << (40 - cnt))
  28. reg->lowmant |= 1;
  29. reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
  30. (reg->mant.m32[0] << (32 - cnt));
  31. reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
  32. break;
  33. case 33 ... 39:
  34. asm volatile ("bfextu %1{%2,#8},%0" : "=d" (reg->lowmant)
  35. : "m" (reg->mant.m32[0]), "d" (64 - cnt));
  36. if (reg->mant.m32[1] << (40 - cnt))
  37. reg->lowmant |= 1;
  38. reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
  39. reg->mant.m32[0] = 0;
  40. break;
  41. case 40 ... 71:
  42. reg->lowmant = reg->mant.m32[0] >> (cnt - 40);
  43. if ((reg->mant.m32[0] << (72 - cnt)) || reg->mant.m32[1])
  44. reg->lowmant |= 1;
  45. reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
  46. reg->mant.m32[0] = 0;
  47. break;
  48. default:
  49. reg->lowmant = reg->mant.m32[0] || reg->mant.m32[1];
  50. reg->mant.m32[0] = 0;
  51. reg->mant.m32[1] = 0;
  52. break;
  53. }
  54. }
  55. static inline int fp_overnormalize(struct fp_ext *reg)
  56. {
  57. int shift;
  58. if (reg->mant.m32[0]) {
  59. asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[0]));
  60. reg->mant.m32[0] = (reg->mant.m32[0] << shift) | (reg->mant.m32[1] >> (32 - shift));
  61. reg->mant.m32[1] = (reg->mant.m32[1] << shift);
  62. } else {
  63. asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[1]));
  64. reg->mant.m32[0] = (reg->mant.m32[1] << shift);
  65. reg->mant.m32[1] = 0;
  66. shift += 32;
  67. }
  68. return shift;
  69. }
  70. static inline int fp_addmant(struct fp_ext *dest, struct fp_ext *src)
  71. {
  72. int carry;
  73. /* we assume here, gcc only insert move and a clr instr */
  74. asm volatile ("add.b %1,%0" : "=d,g" (dest->lowmant)
  75. : "g,d" (src->lowmant), "0,0" (dest->lowmant));
  76. asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[1])
  77. : "d" (src->mant.m32[1]), "0" (dest->mant.m32[1]));
  78. asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[0])
  79. : "d" (src->mant.m32[0]), "0" (dest->mant.m32[0]));
  80. asm volatile ("addx.l %0,%0" : "=d" (carry) : "0" (0));
  81. return carry;
  82. }
  83. static inline int fp_addcarry(struct fp_ext *reg)
  84. {
  85. if (++reg->exp == 0x7fff) {
  86. if (reg->mant.m64)
  87. fp_set_sr(FPSR_EXC_INEX2);
  88. reg->mant.m64 = 0;
  89. fp_set_sr(FPSR_EXC_OVFL);
  90. return 0;
  91. }
  92. reg->lowmant = (reg->mant.m32[1] << 7) | (reg->lowmant ? 1 : 0);
  93. reg->mant.m32[1] = (reg->mant.m32[1] >> 1) |
  94. (reg->mant.m32[0] << 31);
  95. reg->mant.m32[0] = (reg->mant.m32[0] >> 1) | 0x80000000;
  96. return 1;
  97. }
  98. static inline void fp_submant(struct fp_ext *dest, struct fp_ext *src1,
  99. struct fp_ext *src2)
  100. {
  101. /* we assume here, gcc only insert move and a clr instr */
  102. asm volatile ("sub.b %1,%0" : "=d,g" (dest->lowmant)
  103. : "g,d" (src2->lowmant), "0,0" (src1->lowmant));
  104. asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[1])
  105. : "d" (src2->mant.m32[1]), "0" (src1->mant.m32[1]));
  106. asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[0])
  107. : "d" (src2->mant.m32[0]), "0" (src1->mant.m32[0]));
  108. }
  109. #define fp_mul64(desth, destl, src1, src2) ({ \
  110. asm ("mulu.l %2,%1:%0" : "=d" (destl), "=d" (desth) \
  111. : "dm" (src1), "0" (src2)); \
  112. })
  113. #define fp_div64(quot, rem, srch, srcl, div) \
  114. asm ("divu.l %2,%1:%0" : "=d" (quot), "=d" (rem) \
  115. : "dm" (div), "1" (srch), "0" (srcl))
  116. #define fp_add64(dest1, dest2, src1, src2) ({ \
  117. asm ("add.l %1,%0" : "=d,dm" (dest2) \
  118. : "dm,d" (src2), "0,0" (dest2)); \
  119. asm ("addx.l %1,%0" : "=d" (dest1) \
  120. : "d" (src1), "0" (dest1)); \
  121. })
  122. #define fp_addx96(dest, src) ({ \
  123. /* we assume here, gcc only insert move and a clr instr */ \
  124. asm volatile ("add.l %1,%0" : "=d,g" (dest->m32[2]) \
  125. : "g,d" (temp.m32[1]), "0,0" (dest->m32[2])); \
  126. asm volatile ("addx.l %1,%0" : "=d" (dest->m32[1]) \
  127. : "d" (temp.m32[0]), "0" (dest->m32[1])); \
  128. asm volatile ("addx.l %1,%0" : "=d" (dest->m32[0]) \
  129. : "d" (0), "0" (dest->m32[0])); \
  130. })
  131. #define fp_sub64(dest, src) ({ \
  132. asm ("sub.l %1,%0" : "=d,dm" (dest.m32[1]) \
  133. : "dm,d" (src.m32[1]), "0,0" (dest.m32[1])); \
  134. asm ("subx.l %1,%0" : "=d" (dest.m32[0]) \
  135. : "d" (src.m32[0]), "0" (dest.m32[0])); \
  136. })
  137. #define fp_sub96c(dest, srch, srcm, srcl) ({ \
  138. char carry; \
  139. asm ("sub.l %1,%0" : "=d,dm" (dest.m32[2]) \
  140. : "dm,d" (srcl), "0,0" (dest.m32[2])); \
  141. asm ("subx.l %1,%0" : "=d" (dest.m32[1]) \
  142. : "d" (srcm), "0" (dest.m32[1])); \
  143. asm ("subx.l %2,%1; scs %0" : "=d" (carry), "=d" (dest.m32[0]) \
  144. : "d" (srch), "1" (dest.m32[0])); \
  145. carry; \
  146. })
  147. static inline void fp_multiplymant(union fp_mant128 *dest, struct fp_ext *src1,
  148. struct fp_ext *src2)
  149. {
  150. union fp_mant64 temp;
  151. fp_mul64(dest->m32[0], dest->m32[1], src1->mant.m32[0], src2->mant.m32[0]);
  152. fp_mul64(dest->m32[2], dest->m32[3], src1->mant.m32[1], src2->mant.m32[1]);
  153. fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[0], src2->mant.m32[1]);
  154. fp_addx96(dest, temp);
  155. fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[1], src2->mant.m32[0]);
  156. fp_addx96(dest, temp);
  157. }
  158. static inline void fp_dividemant(union fp_mant128 *dest, struct fp_ext *src,
  159. struct fp_ext *div)
  160. {
  161. union fp_mant128 tmp;
  162. union fp_mant64 tmp64;
  163. unsigned long *mantp = dest->m32;
  164. unsigned long fix, rem, first, dummy;
  165. int i;
  166. /* the algorithm below requires dest to be smaller than div,
  167. but both have the high bit set */
  168. if (src->mant.m64 >= div->mant.m64) {
  169. fp_sub64(src->mant, div->mant);
  170. *mantp = 1;
  171. } else
  172. *mantp = 0;
  173. mantp++;
  174. /* basic idea behind this algorithm: we can't divide two 64bit numbers
  175. (AB/CD) directly, but we can calculate AB/C0, but this means this
  176. quotient is off by C0/CD, so we have to multiply the first result
  177. to fix the result, after that we have nearly the correct result
  178. and only a few corrections are needed. */
  179. /* C0/CD can be precalculated, but it's an 64bit division again, but
  180. we can make it a bit easier, by dividing first through C so we get
  181. 10/1D and now only a single shift and the value fits into 32bit. */
  182. fix = 0x80000000;
  183. dummy = div->mant.m32[1] / div->mant.m32[0] + 1;
  184. dummy = (dummy >> 1) | fix;
  185. fp_div64(fix, dummy, fix, 0, dummy);
  186. fix--;
  187. for (i = 0; i < 3; i++, mantp++) {
  188. if (src->mant.m32[0] == div->mant.m32[0]) {
  189. fp_div64(first, rem, 0, src->mant.m32[1], div->mant.m32[0]);
  190. fp_mul64(*mantp, dummy, first, fix);
  191. *mantp += fix;
  192. } else {
  193. fp_div64(first, rem, src->mant.m32[0], src->mant.m32[1], div->mant.m32[0]);
  194. fp_mul64(*mantp, dummy, first, fix);
  195. }
  196. fp_mul64(tmp.m32[0], tmp.m32[1], div->mant.m32[0], first - *mantp);
  197. fp_add64(tmp.m32[0], tmp.m32[1], 0, rem);
  198. tmp.m32[2] = 0;
  199. fp_mul64(tmp64.m32[0], tmp64.m32[1], *mantp, div->mant.m32[1]);
  200. fp_sub96c(tmp, 0, tmp64.m32[0], tmp64.m32[1]);
  201. src->mant.m32[0] = tmp.m32[1];
  202. src->mant.m32[1] = tmp.m32[2];
  203. while (!fp_sub96c(tmp, 0, div->mant.m32[0], div->mant.m32[1])) {
  204. src->mant.m32[0] = tmp.m32[1];
  205. src->mant.m32[1] = tmp.m32[2];
  206. *mantp += 1;
  207. }
  208. }
  209. }
  210. static inline void fp_putmant128(struct fp_ext *dest, union fp_mant128 *src,
  211. int shift)
  212. {
  213. unsigned long tmp;
  214. switch (shift) {
  215. case 0:
  216. dest->mant.m64 = src->m64[0];
  217. dest->lowmant = src->m32[2] >> 24;
  218. if (src->m32[3] || (src->m32[2] << 8))
  219. dest->lowmant |= 1;
  220. break;
  221. case 1:
  222. asm volatile ("lsl.l #1,%0"
  223. : "=d" (tmp) : "0" (src->m32[2]));
  224. asm volatile ("roxl.l #1,%0"
  225. : "=d" (dest->mant.m32[1]) : "0" (src->m32[1]));
  226. asm volatile ("roxl.l #1,%0"
  227. : "=d" (dest->mant.m32[0]) : "0" (src->m32[0]));
  228. dest->lowmant = tmp >> 24;
  229. if (src->m32[3] || (tmp << 8))
  230. dest->lowmant |= 1;
  231. break;
  232. case 31:
  233. asm volatile ("lsr.l #1,%1; roxr.l #1,%0"
  234. : "=d" (dest->mant.m32[0])
  235. : "d" (src->m32[0]), "0" (src->m32[1]));
  236. asm volatile ("roxr.l #1,%0"
  237. : "=d" (dest->mant.m32[1]) : "0" (src->m32[2]));
  238. asm volatile ("roxr.l #1,%0"
  239. : "=d" (tmp) : "0" (src->m32[3]));
  240. dest->lowmant = tmp >> 24;
  241. if (src->m32[3] << 7)
  242. dest->lowmant |= 1;
  243. break;
  244. case 32:
  245. dest->mant.m32[0] = src->m32[1];
  246. dest->mant.m32[1] = src->m32[2];
  247. dest->lowmant = src->m32[3] >> 24;
  248. if (src->m32[3] << 8)
  249. dest->lowmant |= 1;
  250. break;
  251. }
  252. }
  253. #endif /* MULTI_ARITH_H */