dfmpy.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. /*
  2. * Linux/PA-RISC Project (http://www.parisc-linux.org/)
  3. *
  4. * Floating-point emulation code
  5. * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2, or (at your option)
  10. * any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. */
  21. /*
  22. * BEGIN_DESC
  23. *
  24. * File:
  25. * @(#) pa/spmath/dfmpy.c $Revision: 1.1 $
  26. *
  27. * Purpose:
  28. * Double Precision Floating-point Multiply
  29. *
  30. * External Interfaces:
  31. * dbl_fmpy(srcptr1,srcptr2,dstptr,status)
  32. *
  33. * Internal Interfaces:
  34. *
  35. * Theory:
  36. * <<please update with a overview of the operation of this file>>
  37. *
  38. * END_DESC
  39. */
  40. #include "float.h"
  41. #include "dbl_float.h"
  42. /*
  43. * Double Precision Floating-point Multiply
  44. */
  45. int
  46. dbl_fmpy(
  47. dbl_floating_point *srcptr1,
  48. dbl_floating_point *srcptr2,
  49. dbl_floating_point *dstptr,
  50. unsigned int *status)
  51. {
  52. register unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2;
  53. register unsigned int opnd3p1, opnd3p2, resultp1, resultp2;
  54. register int dest_exponent, count;
  55. register boolean inexact = FALSE, guardbit = FALSE, stickybit = FALSE;
  56. boolean is_tiny;
  57. Dbl_copyfromptr(srcptr1,opnd1p1,opnd1p2);
  58. Dbl_copyfromptr(srcptr2,opnd2p1,opnd2p2);
  59. /*
  60. * set sign bit of result
  61. */
  62. if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
  63. Dbl_setnegativezerop1(resultp1);
  64. else Dbl_setzerop1(resultp1);
  65. /*
  66. * check first operand for NaN's or infinity
  67. */
  68. if (Dbl_isinfinity_exponent(opnd1p1)) {
  69. if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
  70. if (Dbl_isnotnan(opnd2p1,opnd2p2)) {
  71. if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
  72. /*
  73. * invalid since operands are infinity
  74. * and zero
  75. */
  76. if (Is_invalidtrap_enabled())
  77. return(INVALIDEXCEPTION);
  78. Set_invalidflag();
  79. Dbl_makequietnan(resultp1,resultp2);
  80. Dbl_copytoptr(resultp1,resultp2,dstptr);
  81. return(NOEXCEPTION);
  82. }
  83. /*
  84. * return infinity
  85. */
  86. Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
  87. Dbl_copytoptr(resultp1,resultp2,dstptr);
  88. return(NOEXCEPTION);
  89. }
  90. }
  91. else {
  92. /*
  93. * is NaN; signaling or quiet?
  94. */
  95. if (Dbl_isone_signaling(opnd1p1)) {
  96. /* trap if INVALIDTRAP enabled */
  97. if (Is_invalidtrap_enabled())
  98. return(INVALIDEXCEPTION);
  99. /* make NaN quiet */
  100. Set_invalidflag();
  101. Dbl_set_quiet(opnd1p1);
  102. }
  103. /*
  104. * is second operand a signaling NaN?
  105. */
  106. else if (Dbl_is_signalingnan(opnd2p1)) {
  107. /* trap if INVALIDTRAP enabled */
  108. if (Is_invalidtrap_enabled())
  109. return(INVALIDEXCEPTION);
  110. /* make NaN quiet */
  111. Set_invalidflag();
  112. Dbl_set_quiet(opnd2p1);
  113. Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
  114. return(NOEXCEPTION);
  115. }
  116. /*
  117. * return quiet NaN
  118. */
  119. Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
  120. return(NOEXCEPTION);
  121. }
  122. }
  123. /*
  124. * check second operand for NaN's or infinity
  125. */
  126. if (Dbl_isinfinity_exponent(opnd2p1)) {
  127. if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
  128. if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
  129. /* invalid since operands are zero & infinity */
  130. if (Is_invalidtrap_enabled())
  131. return(INVALIDEXCEPTION);
  132. Set_invalidflag();
  133. Dbl_makequietnan(opnd2p1,opnd2p2);
  134. Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
  135. return(NOEXCEPTION);
  136. }
  137. /*
  138. * return infinity
  139. */
  140. Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
  141. Dbl_copytoptr(resultp1,resultp2,dstptr);
  142. return(NOEXCEPTION);
  143. }
  144. /*
  145. * is NaN; signaling or quiet?
  146. */
  147. if (Dbl_isone_signaling(opnd2p1)) {
  148. /* trap if INVALIDTRAP enabled */
  149. if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);
  150. /* make NaN quiet */
  151. Set_invalidflag();
  152. Dbl_set_quiet(opnd2p1);
  153. }
  154. /*
  155. * return quiet NaN
  156. */
  157. Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
  158. return(NOEXCEPTION);
  159. }
  160. /*
  161. * Generate exponent
  162. */
  163. dest_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) -DBL_BIAS;
  164. /*
  165. * Generate mantissa
  166. */
  167. if (Dbl_isnotzero_exponent(opnd1p1)) {
  168. /* set hidden bit */
  169. Dbl_clear_signexponent_set_hidden(opnd1p1);
  170. }
  171. else {
  172. /* check for zero */
  173. if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
  174. Dbl_setzero_exponentmantissa(resultp1,resultp2);
  175. Dbl_copytoptr(resultp1,resultp2,dstptr);
  176. return(NOEXCEPTION);
  177. }
  178. /* is denormalized, adjust exponent */
  179. Dbl_clear_signexponent(opnd1p1);
  180. Dbl_leftshiftby1(opnd1p1,opnd1p2);
  181. Dbl_normalize(opnd1p1,opnd1p2,dest_exponent);
  182. }
  183. /* opnd2 needs to have hidden bit set with msb in hidden bit */
  184. if (Dbl_isnotzero_exponent(opnd2p1)) {
  185. Dbl_clear_signexponent_set_hidden(opnd2p1);
  186. }
  187. else {
  188. /* check for zero */
  189. if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
  190. Dbl_setzero_exponentmantissa(resultp1,resultp2);
  191. Dbl_copytoptr(resultp1,resultp2,dstptr);
  192. return(NOEXCEPTION);
  193. }
  194. /* is denormalized; want to normalize */
  195. Dbl_clear_signexponent(opnd2p1);
  196. Dbl_leftshiftby1(opnd2p1,opnd2p2);
  197. Dbl_normalize(opnd2p1,opnd2p2,dest_exponent);
  198. }
  199. /* Multiply two source mantissas together */
  200. /* make room for guard bits */
  201. Dbl_leftshiftby7(opnd2p1,opnd2p2);
  202. Dbl_setzero(opnd3p1,opnd3p2);
  203. /*
  204. * Four bits at a time are inspected in each loop, and a
  205. * simple shift and add multiply algorithm is used.
  206. */
  207. for (count=1;count<=DBL_P;count+=4) {
  208. stickybit |= Dlow4p2(opnd3p2);
  209. Dbl_rightshiftby4(opnd3p1,opnd3p2);
  210. if (Dbit28p2(opnd1p2)) {
  211. /* Twoword_add should be an ADDC followed by an ADD. */
  212. Twoword_add(opnd3p1, opnd3p2, opnd2p1<<3 | opnd2p2>>29,
  213. opnd2p2<<3);
  214. }
  215. if (Dbit29p2(opnd1p2)) {
  216. Twoword_add(opnd3p1, opnd3p2, opnd2p1<<2 | opnd2p2>>30,
  217. opnd2p2<<2);
  218. }
  219. if (Dbit30p2(opnd1p2)) {
  220. Twoword_add(opnd3p1, opnd3p2, opnd2p1<<1 | opnd2p2>>31,
  221. opnd2p2<<1);
  222. }
  223. if (Dbit31p2(opnd1p2)) {
  224. Twoword_add(opnd3p1, opnd3p2, opnd2p1, opnd2p2);
  225. }
  226. Dbl_rightshiftby4(opnd1p1,opnd1p2);
  227. }
  228. if (Dbit3p1(opnd3p1)==0) {
  229. Dbl_leftshiftby1(opnd3p1,opnd3p2);
  230. }
  231. else {
  232. /* result mantissa >= 2. */
  233. dest_exponent++;
  234. }
  235. /* check for denormalized result */
  236. while (Dbit3p1(opnd3p1)==0) {
  237. Dbl_leftshiftby1(opnd3p1,opnd3p2);
  238. dest_exponent--;
  239. }
  240. /*
  241. * check for guard, sticky and inexact bits
  242. */
  243. stickybit |= Dallp2(opnd3p2) << 25;
  244. guardbit = (Dallp2(opnd3p2) << 24) >> 31;
  245. inexact = guardbit | stickybit;
  246. /* align result mantissa */
  247. Dbl_rightshiftby8(opnd3p1,opnd3p2);
  248. /*
  249. * round result
  250. */
  251. if (inexact && (dest_exponent>0 || Is_underflowtrap_enabled())) {
  252. Dbl_clear_signexponent(opnd3p1);
  253. switch (Rounding_mode()) {
  254. case ROUNDPLUS:
  255. if (Dbl_iszero_sign(resultp1))
  256. Dbl_increment(opnd3p1,opnd3p2);
  257. break;
  258. case ROUNDMINUS:
  259. if (Dbl_isone_sign(resultp1))
  260. Dbl_increment(opnd3p1,opnd3p2);
  261. break;
  262. case ROUNDNEAREST:
  263. if (guardbit) {
  264. if (stickybit || Dbl_isone_lowmantissap2(opnd3p2))
  265. Dbl_increment(opnd3p1,opnd3p2);
  266. }
  267. }
  268. if (Dbl_isone_hidden(opnd3p1)) dest_exponent++;
  269. }
  270. Dbl_set_mantissa(resultp1,resultp2,opnd3p1,opnd3p2);
  271. /*
  272. * Test for overflow
  273. */
  274. if (dest_exponent >= DBL_INFINITY_EXPONENT) {
  275. /* trap if OVERFLOWTRAP enabled */
  276. if (Is_overflowtrap_enabled()) {
  277. /*
  278. * Adjust bias of result
  279. */
  280. Dbl_setwrapped_exponent(resultp1,dest_exponent,ovfl);
  281. Dbl_copytoptr(resultp1,resultp2,dstptr);
  282. if (inexact)
  283. if (Is_inexacttrap_enabled())
  284. return (OVERFLOWEXCEPTION | INEXACTEXCEPTION);
  285. else Set_inexactflag();
  286. return (OVERFLOWEXCEPTION);
  287. }
  288. inexact = TRUE;
  289. Set_overflowflag();
  290. /* set result to infinity or largest number */
  291. Dbl_setoverflow(resultp1,resultp2);
  292. }
  293. /*
  294. * Test for underflow
  295. */
  296. else if (dest_exponent <= 0) {
  297. /* trap if UNDERFLOWTRAP enabled */
  298. if (Is_underflowtrap_enabled()) {
  299. /*
  300. * Adjust bias of result
  301. */
  302. Dbl_setwrapped_exponent(resultp1,dest_exponent,unfl);
  303. Dbl_copytoptr(resultp1,resultp2,dstptr);
  304. if (inexact)
  305. if (Is_inexacttrap_enabled())
  306. return (UNDERFLOWEXCEPTION | INEXACTEXCEPTION);
  307. else Set_inexactflag();
  308. return (UNDERFLOWEXCEPTION);
  309. }
  310. /* Determine if should set underflow flag */
  311. is_tiny = TRUE;
  312. if (dest_exponent == 0 && inexact) {
  313. switch (Rounding_mode()) {
  314. case ROUNDPLUS:
  315. if (Dbl_iszero_sign(resultp1)) {
  316. Dbl_increment(opnd3p1,opnd3p2);
  317. if (Dbl_isone_hiddenoverflow(opnd3p1))
  318. is_tiny = FALSE;
  319. Dbl_decrement(opnd3p1,opnd3p2);
  320. }
  321. break;
  322. case ROUNDMINUS:
  323. if (Dbl_isone_sign(resultp1)) {
  324. Dbl_increment(opnd3p1,opnd3p2);
  325. if (Dbl_isone_hiddenoverflow(opnd3p1))
  326. is_tiny = FALSE;
  327. Dbl_decrement(opnd3p1,opnd3p2);
  328. }
  329. break;
  330. case ROUNDNEAREST:
  331. if (guardbit && (stickybit ||
  332. Dbl_isone_lowmantissap2(opnd3p2))) {
  333. Dbl_increment(opnd3p1,opnd3p2);
  334. if (Dbl_isone_hiddenoverflow(opnd3p1))
  335. is_tiny = FALSE;
  336. Dbl_decrement(opnd3p1,opnd3p2);
  337. }
  338. break;
  339. }
  340. }
  341. /*
  342. * denormalize result or set to signed zero
  343. */
  344. stickybit = inexact;
  345. Dbl_denormalize(opnd3p1,opnd3p2,dest_exponent,guardbit,
  346. stickybit,inexact);
  347. /* return zero or smallest number */
  348. if (inexact) {
  349. switch (Rounding_mode()) {
  350. case ROUNDPLUS:
  351. if (Dbl_iszero_sign(resultp1)) {
  352. Dbl_increment(opnd3p1,opnd3p2);
  353. }
  354. break;
  355. case ROUNDMINUS:
  356. if (Dbl_isone_sign(resultp1)) {
  357. Dbl_increment(opnd3p1,opnd3p2);
  358. }
  359. break;
  360. case ROUNDNEAREST:
  361. if (guardbit && (stickybit ||
  362. Dbl_isone_lowmantissap2(opnd3p2))) {
  363. Dbl_increment(opnd3p1,opnd3p2);
  364. }
  365. break;
  366. }
  367. if (is_tiny) Set_underflowflag();
  368. }
  369. Dbl_set_exponentmantissa(resultp1,resultp2,opnd3p1,opnd3p2);
  370. }
  371. else Dbl_set_exponent(resultp1,dest_exponent);
  372. /* check for inexact */
  373. Dbl_copytoptr(resultp1,resultp2,dstptr);
  374. if (inexact) {
  375. if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION);
  376. else Set_inexactflag();
  377. }
  378. return(NOEXCEPTION);
  379. }