fmpyfadd.c 78 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655
  1. /*
  2. * Linux/PA-RISC Project (http://www.parisc-linux.org/)
  3. *
  4. * Floating-point emulation code
  5. * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2, or (at your option)
  10. * any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. */
  21. /*
  22. * BEGIN_DESC
  23. *
  24. * File:
  25. * @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $
  26. *
  27. * Purpose:
  28. * Double Floating-point Multiply Fused Add
  29. * Double Floating-point Multiply Negate Fused Add
  30. * Single Floating-point Multiply Fused Add
  31. * Single Floating-point Multiply Negate Fused Add
  32. *
  33. * External Interfaces:
  34. * dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  35. * dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  36. * sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  37. * sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  38. *
  39. * Internal Interfaces:
  40. *
  41. * Theory:
  42. * <<please update with a overview of the operation of this file>>
  43. *
  44. * END_DESC
  45. */
  46. #include "float.h"
  47. #include "sgl_float.h"
  48. #include "dbl_float.h"
  49. /*
  50. * Double Floating-point Multiply Fused Add
  51. */
  52. int
  53. dbl_fmpyfadd(
  54. dbl_floating_point *src1ptr,
  55. dbl_floating_point *src2ptr,
  56. dbl_floating_point *src3ptr,
  57. unsigned int *status,
  58. dbl_floating_point *dstptr)
  59. {
  60. unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
  61. register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
  62. unsigned int rightp1, rightp2, rightp3, rightp4;
  63. unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
  64. register int mpy_exponent, add_exponent, count;
  65. boolean inexact = FALSE, is_tiny = FALSE;
  66. unsigned int signlessleft1, signlessright1, save;
  67. register int result_exponent, diff_exponent;
  68. int sign_save, jumpsize;
  69. Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
  70. Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
  71. Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
  72. /*
  73. * set sign bit of result of multiply
  74. */
  75. if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
  76. Dbl_setnegativezerop1(resultp1);
  77. else Dbl_setzerop1(resultp1);
  78. /*
  79. * Generate multiply exponent
  80. */
  81. mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
  82. /*
  83. * check first operand for NaN's or infinity
  84. */
  85. if (Dbl_isinfinity_exponent(opnd1p1)) {
  86. if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
  87. if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
  88. Dbl_isnotnan(opnd3p1,opnd3p2)) {
  89. if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
  90. /*
  91. * invalid since operands are infinity
  92. * and zero
  93. */
  94. if (Is_invalidtrap_enabled())
  95. return(OPC_2E_INVALIDEXCEPTION);
  96. Set_invalidflag();
  97. Dbl_makequietnan(resultp1,resultp2);
  98. Dbl_copytoptr(resultp1,resultp2,dstptr);
  99. return(NOEXCEPTION);
  100. }
  101. /*
  102. * Check third operand for infinity with a
  103. * sign opposite of the multiply result
  104. */
  105. if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
  106. (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
  107. /*
  108. * invalid since attempting a magnitude
  109. * subtraction of infinities
  110. */
  111. if (Is_invalidtrap_enabled())
  112. return(OPC_2E_INVALIDEXCEPTION);
  113. Set_invalidflag();
  114. Dbl_makequietnan(resultp1,resultp2);
  115. Dbl_copytoptr(resultp1,resultp2,dstptr);
  116. return(NOEXCEPTION);
  117. }
  118. /*
  119. * return infinity
  120. */
  121. Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
  122. Dbl_copytoptr(resultp1,resultp2,dstptr);
  123. return(NOEXCEPTION);
  124. }
  125. }
  126. else {
  127. /*
  128. * is NaN; signaling or quiet?
  129. */
  130. if (Dbl_isone_signaling(opnd1p1)) {
  131. /* trap if INVALIDTRAP enabled */
  132. if (Is_invalidtrap_enabled())
  133. return(OPC_2E_INVALIDEXCEPTION);
  134. /* make NaN quiet */
  135. Set_invalidflag();
  136. Dbl_set_quiet(opnd1p1);
  137. }
  138. /*
  139. * is second operand a signaling NaN?
  140. */
  141. else if (Dbl_is_signalingnan(opnd2p1)) {
  142. /* trap if INVALIDTRAP enabled */
  143. if (Is_invalidtrap_enabled())
  144. return(OPC_2E_INVALIDEXCEPTION);
  145. /* make NaN quiet */
  146. Set_invalidflag();
  147. Dbl_set_quiet(opnd2p1);
  148. Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
  149. return(NOEXCEPTION);
  150. }
  151. /*
  152. * is third operand a signaling NaN?
  153. */
  154. else if (Dbl_is_signalingnan(opnd3p1)) {
  155. /* trap if INVALIDTRAP enabled */
  156. if (Is_invalidtrap_enabled())
  157. return(OPC_2E_INVALIDEXCEPTION);
  158. /* make NaN quiet */
  159. Set_invalidflag();
  160. Dbl_set_quiet(opnd3p1);
  161. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  162. return(NOEXCEPTION);
  163. }
  164. /*
  165. * return quiet NaN
  166. */
  167. Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
  168. return(NOEXCEPTION);
  169. }
  170. }
  171. /*
  172. * check second operand for NaN's or infinity
  173. */
  174. if (Dbl_isinfinity_exponent(opnd2p1)) {
  175. if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
  176. if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
  177. if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
  178. /*
  179. * invalid since multiply operands are
  180. * zero & infinity
  181. */
  182. if (Is_invalidtrap_enabled())
  183. return(OPC_2E_INVALIDEXCEPTION);
  184. Set_invalidflag();
  185. Dbl_makequietnan(opnd2p1,opnd2p2);
  186. Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
  187. return(NOEXCEPTION);
  188. }
  189. /*
  190. * Check third operand for infinity with a
  191. * sign opposite of the multiply result
  192. */
  193. if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
  194. (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
  195. /*
  196. * invalid since attempting a magnitude
  197. * subtraction of infinities
  198. */
  199. if (Is_invalidtrap_enabled())
  200. return(OPC_2E_INVALIDEXCEPTION);
  201. Set_invalidflag();
  202. Dbl_makequietnan(resultp1,resultp2);
  203. Dbl_copytoptr(resultp1,resultp2,dstptr);
  204. return(NOEXCEPTION);
  205. }
  206. /*
  207. * return infinity
  208. */
  209. Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
  210. Dbl_copytoptr(resultp1,resultp2,dstptr);
  211. return(NOEXCEPTION);
  212. }
  213. }
  214. else {
  215. /*
  216. * is NaN; signaling or quiet?
  217. */
  218. if (Dbl_isone_signaling(opnd2p1)) {
  219. /* trap if INVALIDTRAP enabled */
  220. if (Is_invalidtrap_enabled())
  221. return(OPC_2E_INVALIDEXCEPTION);
  222. /* make NaN quiet */
  223. Set_invalidflag();
  224. Dbl_set_quiet(opnd2p1);
  225. }
  226. /*
  227. * is third operand a signaling NaN?
  228. */
  229. else if (Dbl_is_signalingnan(opnd3p1)) {
  230. /* trap if INVALIDTRAP enabled */
  231. if (Is_invalidtrap_enabled())
  232. return(OPC_2E_INVALIDEXCEPTION);
  233. /* make NaN quiet */
  234. Set_invalidflag();
  235. Dbl_set_quiet(opnd3p1);
  236. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  237. return(NOEXCEPTION);
  238. }
  239. /*
  240. * return quiet NaN
  241. */
  242. Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
  243. return(NOEXCEPTION);
  244. }
  245. }
  246. /*
  247. * check third operand for NaN's or infinity
  248. */
  249. if (Dbl_isinfinity_exponent(opnd3p1)) {
  250. if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
  251. /* return infinity */
  252. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  253. return(NOEXCEPTION);
  254. } else {
  255. /*
  256. * is NaN; signaling or quiet?
  257. */
  258. if (Dbl_isone_signaling(opnd3p1)) {
  259. /* trap if INVALIDTRAP enabled */
  260. if (Is_invalidtrap_enabled())
  261. return(OPC_2E_INVALIDEXCEPTION);
  262. /* make NaN quiet */
  263. Set_invalidflag();
  264. Dbl_set_quiet(opnd3p1);
  265. }
  266. /*
  267. * return quiet NaN
  268. */
  269. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  270. return(NOEXCEPTION);
  271. }
  272. }
  273. /*
  274. * Generate multiply mantissa
  275. */
  276. if (Dbl_isnotzero_exponent(opnd1p1)) {
  277. /* set hidden bit */
  278. Dbl_clear_signexponent_set_hidden(opnd1p1);
  279. }
  280. else {
  281. /* check for zero */
  282. if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
  283. /*
  284. * Perform the add opnd3 with zero here.
  285. */
  286. if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
  287. if (Is_rounding_mode(ROUNDMINUS)) {
  288. Dbl_or_signs(opnd3p1,resultp1);
  289. } else {
  290. Dbl_and_signs(opnd3p1,resultp1);
  291. }
  292. }
  293. /*
  294. * Now let's check for trapped underflow case.
  295. */
  296. else if (Dbl_iszero_exponent(opnd3p1) &&
  297. Is_underflowtrap_enabled()) {
  298. /* need to normalize results mantissa */
  299. sign_save = Dbl_signextendedsign(opnd3p1);
  300. result_exponent = 0;
  301. Dbl_leftshiftby1(opnd3p1,opnd3p2);
  302. Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
  303. Dbl_set_sign(opnd3p1,/*using*/sign_save);
  304. Dbl_setwrapped_exponent(opnd3p1,result_exponent,
  305. unfl);
  306. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  307. /* inexact = FALSE */
  308. return(OPC_2E_UNDERFLOWEXCEPTION);
  309. }
  310. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  311. return(NOEXCEPTION);
  312. }
  313. /* is denormalized, adjust exponent */
  314. Dbl_clear_signexponent(opnd1p1);
  315. Dbl_leftshiftby1(opnd1p1,opnd1p2);
  316. Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
  317. }
  318. /* opnd2 needs to have hidden bit set with msb in hidden bit */
  319. if (Dbl_isnotzero_exponent(opnd2p1)) {
  320. Dbl_clear_signexponent_set_hidden(opnd2p1);
  321. }
  322. else {
  323. /* check for zero */
  324. if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
  325. /*
  326. * Perform the add opnd3 with zero here.
  327. */
  328. if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
  329. if (Is_rounding_mode(ROUNDMINUS)) {
  330. Dbl_or_signs(opnd3p1,resultp1);
  331. } else {
  332. Dbl_and_signs(opnd3p1,resultp1);
  333. }
  334. }
  335. /*
  336. * Now let's check for trapped underflow case.
  337. */
  338. else if (Dbl_iszero_exponent(opnd3p1) &&
  339. Is_underflowtrap_enabled()) {
  340. /* need to normalize results mantissa */
  341. sign_save = Dbl_signextendedsign(opnd3p1);
  342. result_exponent = 0;
  343. Dbl_leftshiftby1(opnd3p1,opnd3p2);
  344. Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
  345. Dbl_set_sign(opnd3p1,/*using*/sign_save);
  346. Dbl_setwrapped_exponent(opnd3p1,result_exponent,
  347. unfl);
  348. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  349. /* inexact = FALSE */
  350. return(OPC_2E_UNDERFLOWEXCEPTION);
  351. }
  352. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  353. return(NOEXCEPTION);
  354. }
  355. /* is denormalized; want to normalize */
  356. Dbl_clear_signexponent(opnd2p1);
  357. Dbl_leftshiftby1(opnd2p1,opnd2p2);
  358. Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
  359. }
  360. /* Multiply the first two source mantissas together */
  361. /*
  362. * The intermediate result will be kept in tmpres,
  363. * which needs enough room for 106 bits of mantissa,
  364. * so lets call it a Double extended.
  365. */
  366. Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
  367. /*
  368. * Four bits at a time are inspected in each loop, and a
  369. * simple shift and add multiply algorithm is used.
  370. */
  371. for (count = DBL_P-1; count >= 0; count -= 4) {
  372. Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
  373. if (Dbit28p2(opnd1p2)) {
  374. /* Fourword_add should be an ADD followed by 3 ADDC's */
  375. Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
  376. opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
  377. }
  378. if (Dbit29p2(opnd1p2)) {
  379. Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
  380. opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
  381. }
  382. if (Dbit30p2(opnd1p2)) {
  383. Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
  384. opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
  385. }
  386. if (Dbit31p2(opnd1p2)) {
  387. Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
  388. opnd2p1, opnd2p2, 0, 0);
  389. }
  390. Dbl_rightshiftby4(opnd1p1,opnd1p2);
  391. }
  392. if (Is_dexthiddenoverflow(tmpresp1)) {
  393. /* result mantissa >= 2 (mantissa overflow) */
  394. mpy_exponent++;
  395. Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
  396. }
  397. /*
  398. * Restore the sign of the mpy result which was saved in resultp1.
  399. * The exponent will continue to be kept in mpy_exponent.
  400. */
  401. Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
  402. /*
  403. * No rounding is required, since the result of the multiply
  404. * is exact in the extended format.
  405. */
  406. /*
  407. * Now we are ready to perform the add portion of the operation.
  408. *
  409. * The exponents need to be kept as integers for now, since the
  410. * multiply result might not fit into the exponent field. We
  411. * can't overflow or underflow because of this yet, since the
  412. * add could bring the final result back into range.
  413. */
  414. add_exponent = Dbl_exponent(opnd3p1);
  415. /*
  416. * Check for denormalized or zero add operand.
  417. */
  418. if (add_exponent == 0) {
  419. /* check for zero */
  420. if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
  421. /* right is zero */
  422. /* Left can't be zero and must be result.
  423. *
  424. * The final result is now in tmpres and mpy_exponent,
  425. * and needs to be rounded and squeezed back into
  426. * double precision format from double extended.
  427. */
  428. result_exponent = mpy_exponent;
  429. Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
  430. resultp1,resultp2,resultp3,resultp4);
  431. sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
  432. goto round;
  433. }
  434. /*
  435. * Neither are zeroes.
  436. * Adjust exponent and normalize add operand.
  437. */
  438. sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
  439. Dbl_clear_signexponent(opnd3p1);
  440. Dbl_leftshiftby1(opnd3p1,opnd3p2);
  441. Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
  442. Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
  443. } else {
  444. Dbl_clear_exponent_set_hidden(opnd3p1);
  445. }
  446. /*
  447. * Copy opnd3 to the double extended variable called right.
  448. */
  449. Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
  450. /*
  451. * A zero "save" helps discover equal operands (for later),
  452. * and is used in swapping operands (if needed).
  453. */
  454. Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
  455. /*
  456. * Compare magnitude of operands.
  457. */
  458. Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
  459. Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
  460. if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
  461. Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
  462. /*
  463. * Set the left operand to the larger one by XOR swap.
  464. * First finish the first word "save".
  465. */
  466. Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
  467. Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
  468. Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
  469. rightp2,rightp3,rightp4);
  470. /* also setup exponents used in rest of routine */
  471. diff_exponent = add_exponent - mpy_exponent;
  472. result_exponent = add_exponent;
  473. } else {
  474. /* also setup exponents used in rest of routine */
  475. diff_exponent = mpy_exponent - add_exponent;
  476. result_exponent = mpy_exponent;
  477. }
  478. /* Invariant: left is not smaller than right. */
  479. /*
  480. * Special case alignment of operands that would force alignment
  481. * beyond the extent of the extension. A further optimization
  482. * could special case this but only reduces the path length for
  483. * this infrequent case.
  484. */
  485. if (diff_exponent > DBLEXT_THRESHOLD) {
  486. diff_exponent = DBLEXT_THRESHOLD;
  487. }
  488. /* Align right operand by shifting it to the right */
  489. Dblext_clear_sign(rightp1);
  490. Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
  491. /*shifted by*/diff_exponent);
  492. /* Treat sum and difference of the operands separately. */
  493. if ((int)save < 0) {
  494. /*
  495. * Difference of the two operands. Overflow can occur if the
  496. * multiply overflowed. A borrow can occur out of the hidden
  497. * bit and force a post normalization phase.
  498. */
  499. Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
  500. rightp1,rightp2,rightp3,rightp4,
  501. resultp1,resultp2,resultp3,resultp4);
  502. sign_save = Dbl_signextendedsign(resultp1);
  503. if (Dbl_iszero_hidden(resultp1)) {
  504. /* Handle normalization */
  505. /* A straightforward algorithm would now shift the
  506. * result and extension left until the hidden bit
  507. * becomes one. Not all of the extension bits need
  508. * participate in the shift. Only the two most
  509. * significant bits (round and guard) are needed.
  510. * If only a single shift is needed then the guard
  511. * bit becomes a significant low order bit and the
  512. * extension must participate in the rounding.
  513. * If more than a single shift is needed, then all
  514. * bits to the right of the guard bit are zeros,
  515. * and the guard bit may or may not be zero. */
  516. Dblext_leftshiftby1(resultp1,resultp2,resultp3,
  517. resultp4);
  518. /* Need to check for a zero result. The sign and
  519. * exponent fields have already been zeroed. The more
  520. * efficient test of the full object can be used.
  521. */
  522. if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
  523. /* Must have been "x-x" or "x+(-x)". */
  524. if (Is_rounding_mode(ROUNDMINUS))
  525. Dbl_setone_sign(resultp1);
  526. Dbl_copytoptr(resultp1,resultp2,dstptr);
  527. return(NOEXCEPTION);
  528. }
  529. result_exponent--;
  530. /* Look to see if normalization is finished. */
  531. if (Dbl_isone_hidden(resultp1)) {
  532. /* No further normalization is needed */
  533. goto round;
  534. }
  535. /* Discover first one bit to determine shift amount.
  536. * Use a modified binary search. We have already
  537. * shifted the result one position right and still
  538. * not found a one so the remainder of the extension
  539. * must be zero and simplifies rounding. */
  540. /* Scan bytes */
  541. while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
  542. Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
  543. result_exponent -= 8;
  544. }
  545. /* Now narrow it down to the nibble */
  546. if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
  547. /* The lower nibble contains the
  548. * normalizing one */
  549. Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
  550. result_exponent -= 4;
  551. }
  552. /* Select case where first bit is set (already
  553. * normalized) otherwise select the proper shift. */
  554. jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
  555. if (jumpsize <= 7) switch(jumpsize) {
  556. case 1:
  557. Dblext_leftshiftby3(resultp1,resultp2,resultp3,
  558. resultp4);
  559. result_exponent -= 3;
  560. break;
  561. case 2:
  562. case 3:
  563. Dblext_leftshiftby2(resultp1,resultp2,resultp3,
  564. resultp4);
  565. result_exponent -= 2;
  566. break;
  567. case 4:
  568. case 5:
  569. case 6:
  570. case 7:
  571. Dblext_leftshiftby1(resultp1,resultp2,resultp3,
  572. resultp4);
  573. result_exponent -= 1;
  574. break;
  575. }
  576. } /* end if (hidden...)... */
  577. /* Fall through and round */
  578. } /* end if (save < 0)... */
  579. else {
  580. /* Add magnitudes */
  581. Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
  582. rightp1,rightp2,rightp3,rightp4,
  583. /*to*/resultp1,resultp2,resultp3,resultp4);
  584. sign_save = Dbl_signextendedsign(resultp1);
  585. if (Dbl_isone_hiddenoverflow(resultp1)) {
  586. /* Prenormalization required. */
  587. Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
  588. resultp4);
  589. result_exponent++;
  590. } /* end if hiddenoverflow... */
  591. } /* end else ...add magnitudes... */
  592. /* Round the result. If the extension and lower two words are
  593. * all zeros, then the result is exact. Otherwise round in the
  594. * correct direction. Underflow is possible. If a postnormalization
  595. * is necessary, then the mantissa is all zeros so no shift is needed.
  596. */
  597. round:
  598. if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
  599. Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
  600. result_exponent,is_tiny);
  601. }
  602. Dbl_set_sign(resultp1,/*using*/sign_save);
  603. if (Dblext_isnotzero_mantissap3(resultp3) ||
  604. Dblext_isnotzero_mantissap4(resultp4)) {
  605. inexact = TRUE;
  606. switch(Rounding_mode()) {
  607. case ROUNDNEAREST: /* The default. */
  608. if (Dblext_isone_highp3(resultp3)) {
  609. /* at least 1/2 ulp */
  610. if (Dblext_isnotzero_low31p3(resultp3) ||
  611. Dblext_isnotzero_mantissap4(resultp4) ||
  612. Dblext_isone_lowp2(resultp2)) {
  613. /* either exactly half way and odd or
  614. * more than 1/2ulp */
  615. Dbl_increment(resultp1,resultp2);
  616. }
  617. }
  618. break;
  619. case ROUNDPLUS:
  620. if (Dbl_iszero_sign(resultp1)) {
  621. /* Round up positive results */
  622. Dbl_increment(resultp1,resultp2);
  623. }
  624. break;
  625. case ROUNDMINUS:
  626. if (Dbl_isone_sign(resultp1)) {
  627. /* Round down negative results */
  628. Dbl_increment(resultp1,resultp2);
  629. }
  630. case ROUNDZERO:;
  631. /* truncate is simple */
  632. } /* end switch... */
  633. if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
  634. }
  635. if (result_exponent >= DBL_INFINITY_EXPONENT) {
  636. /* trap if OVERFLOWTRAP enabled */
  637. if (Is_overflowtrap_enabled()) {
  638. /*
  639. * Adjust bias of result
  640. */
  641. Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
  642. Dbl_copytoptr(resultp1,resultp2,dstptr);
  643. if (inexact)
  644. if (Is_inexacttrap_enabled())
  645. return (OPC_2E_OVERFLOWEXCEPTION |
  646. OPC_2E_INEXACTEXCEPTION);
  647. else Set_inexactflag();
  648. return (OPC_2E_OVERFLOWEXCEPTION);
  649. }
  650. inexact = TRUE;
  651. Set_overflowflag();
  652. /* set result to infinity or largest number */
  653. Dbl_setoverflow(resultp1,resultp2);
  654. } else if (result_exponent <= 0) { /* underflow case */
  655. if (Is_underflowtrap_enabled()) {
  656. /*
  657. * Adjust bias of result
  658. */
  659. Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
  660. Dbl_copytoptr(resultp1,resultp2,dstptr);
  661. if (inexact)
  662. if (Is_inexacttrap_enabled())
  663. return (OPC_2E_UNDERFLOWEXCEPTION |
  664. OPC_2E_INEXACTEXCEPTION);
  665. else Set_inexactflag();
  666. return(OPC_2E_UNDERFLOWEXCEPTION);
  667. }
  668. else if (inexact && is_tiny) Set_underflowflag();
  669. }
  670. else Dbl_set_exponent(resultp1,result_exponent);
  671. Dbl_copytoptr(resultp1,resultp2,dstptr);
  672. if (inexact)
  673. if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
  674. else Set_inexactflag();
  675. return(NOEXCEPTION);
  676. }
  677. /*
  678. * Double Floating-point Multiply Negate Fused Add
  679. */
  680. dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  681. dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
  682. unsigned int *status;
  683. {
  684. unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
  685. register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
  686. unsigned int rightp1, rightp2, rightp3, rightp4;
  687. unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
  688. register int mpy_exponent, add_exponent, count;
  689. boolean inexact = FALSE, is_tiny = FALSE;
  690. unsigned int signlessleft1, signlessright1, save;
  691. register int result_exponent, diff_exponent;
  692. int sign_save, jumpsize;
  693. Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
  694. Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
  695. Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
  696. /*
  697. * set sign bit of result of multiply
  698. */
  699. if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
  700. Dbl_setzerop1(resultp1);
  701. else
  702. Dbl_setnegativezerop1(resultp1);
  703. /*
  704. * Generate multiply exponent
  705. */
  706. mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
  707. /*
  708. * check first operand for NaN's or infinity
  709. */
  710. if (Dbl_isinfinity_exponent(opnd1p1)) {
  711. if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
  712. if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
  713. Dbl_isnotnan(opnd3p1,opnd3p2)) {
  714. if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
  715. /*
  716. * invalid since operands are infinity
  717. * and zero
  718. */
  719. if (Is_invalidtrap_enabled())
  720. return(OPC_2E_INVALIDEXCEPTION);
  721. Set_invalidflag();
  722. Dbl_makequietnan(resultp1,resultp2);
  723. Dbl_copytoptr(resultp1,resultp2,dstptr);
  724. return(NOEXCEPTION);
  725. }
  726. /*
  727. * Check third operand for infinity with a
  728. * sign opposite of the multiply result
  729. */
  730. if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
  731. (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
  732. /*
  733. * invalid since attempting a magnitude
  734. * subtraction of infinities
  735. */
  736. if (Is_invalidtrap_enabled())
  737. return(OPC_2E_INVALIDEXCEPTION);
  738. Set_invalidflag();
  739. Dbl_makequietnan(resultp1,resultp2);
  740. Dbl_copytoptr(resultp1,resultp2,dstptr);
  741. return(NOEXCEPTION);
  742. }
  743. /*
  744. * return infinity
  745. */
  746. Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
  747. Dbl_copytoptr(resultp1,resultp2,dstptr);
  748. return(NOEXCEPTION);
  749. }
  750. }
  751. else {
  752. /*
  753. * is NaN; signaling or quiet?
  754. */
  755. if (Dbl_isone_signaling(opnd1p1)) {
  756. /* trap if INVALIDTRAP enabled */
  757. if (Is_invalidtrap_enabled())
  758. return(OPC_2E_INVALIDEXCEPTION);
  759. /* make NaN quiet */
  760. Set_invalidflag();
  761. Dbl_set_quiet(opnd1p1);
  762. }
  763. /*
  764. * is second operand a signaling NaN?
  765. */
  766. else if (Dbl_is_signalingnan(opnd2p1)) {
  767. /* trap if INVALIDTRAP enabled */
  768. if (Is_invalidtrap_enabled())
  769. return(OPC_2E_INVALIDEXCEPTION);
  770. /* make NaN quiet */
  771. Set_invalidflag();
  772. Dbl_set_quiet(opnd2p1);
  773. Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
  774. return(NOEXCEPTION);
  775. }
  776. /*
  777. * is third operand a signaling NaN?
  778. */
  779. else if (Dbl_is_signalingnan(opnd3p1)) {
  780. /* trap if INVALIDTRAP enabled */
  781. if (Is_invalidtrap_enabled())
  782. return(OPC_2E_INVALIDEXCEPTION);
  783. /* make NaN quiet */
  784. Set_invalidflag();
  785. Dbl_set_quiet(opnd3p1);
  786. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  787. return(NOEXCEPTION);
  788. }
  789. /*
  790. * return quiet NaN
  791. */
  792. Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
  793. return(NOEXCEPTION);
  794. }
  795. }
  796. /*
  797. * check second operand for NaN's or infinity
  798. */
  799. if (Dbl_isinfinity_exponent(opnd2p1)) {
  800. if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
  801. if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
  802. if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
  803. /*
  804. * invalid since multiply operands are
  805. * zero & infinity
  806. */
  807. if (Is_invalidtrap_enabled())
  808. return(OPC_2E_INVALIDEXCEPTION);
  809. Set_invalidflag();
  810. Dbl_makequietnan(opnd2p1,opnd2p2);
  811. Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
  812. return(NOEXCEPTION);
  813. }
  814. /*
  815. * Check third operand for infinity with a
  816. * sign opposite of the multiply result
  817. */
  818. if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
  819. (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
  820. /*
  821. * invalid since attempting a magnitude
  822. * subtraction of infinities
  823. */
  824. if (Is_invalidtrap_enabled())
  825. return(OPC_2E_INVALIDEXCEPTION);
  826. Set_invalidflag();
  827. Dbl_makequietnan(resultp1,resultp2);
  828. Dbl_copytoptr(resultp1,resultp2,dstptr);
  829. return(NOEXCEPTION);
  830. }
  831. /*
  832. * return infinity
  833. */
  834. Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
  835. Dbl_copytoptr(resultp1,resultp2,dstptr);
  836. return(NOEXCEPTION);
  837. }
  838. }
  839. else {
  840. /*
  841. * is NaN; signaling or quiet?
  842. */
  843. if (Dbl_isone_signaling(opnd2p1)) {
  844. /* trap if INVALIDTRAP enabled */
  845. if (Is_invalidtrap_enabled())
  846. return(OPC_2E_INVALIDEXCEPTION);
  847. /* make NaN quiet */
  848. Set_invalidflag();
  849. Dbl_set_quiet(opnd2p1);
  850. }
  851. /*
  852. * is third operand a signaling NaN?
  853. */
  854. else if (Dbl_is_signalingnan(opnd3p1)) {
  855. /* trap if INVALIDTRAP enabled */
  856. if (Is_invalidtrap_enabled())
  857. return(OPC_2E_INVALIDEXCEPTION);
  858. /* make NaN quiet */
  859. Set_invalidflag();
  860. Dbl_set_quiet(opnd3p1);
  861. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  862. return(NOEXCEPTION);
  863. }
  864. /*
  865. * return quiet NaN
  866. */
  867. Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
  868. return(NOEXCEPTION);
  869. }
  870. }
  871. /*
  872. * check third operand for NaN's or infinity
  873. */
  874. if (Dbl_isinfinity_exponent(opnd3p1)) {
  875. if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
  876. /* return infinity */
  877. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  878. return(NOEXCEPTION);
  879. } else {
  880. /*
  881. * is NaN; signaling or quiet?
  882. */
  883. if (Dbl_isone_signaling(opnd3p1)) {
  884. /* trap if INVALIDTRAP enabled */
  885. if (Is_invalidtrap_enabled())
  886. return(OPC_2E_INVALIDEXCEPTION);
  887. /* make NaN quiet */
  888. Set_invalidflag();
  889. Dbl_set_quiet(opnd3p1);
  890. }
  891. /*
  892. * return quiet NaN
  893. */
  894. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  895. return(NOEXCEPTION);
  896. }
  897. }
  898. /*
  899. * Generate multiply mantissa
  900. */
  901. if (Dbl_isnotzero_exponent(opnd1p1)) {
  902. /* set hidden bit */
  903. Dbl_clear_signexponent_set_hidden(opnd1p1);
  904. }
  905. else {
  906. /* check for zero */
  907. if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
  908. /*
  909. * Perform the add opnd3 with zero here.
  910. */
  911. if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
  912. if (Is_rounding_mode(ROUNDMINUS)) {
  913. Dbl_or_signs(opnd3p1,resultp1);
  914. } else {
  915. Dbl_and_signs(opnd3p1,resultp1);
  916. }
  917. }
  918. /*
  919. * Now let's check for trapped underflow case.
  920. */
  921. else if (Dbl_iszero_exponent(opnd3p1) &&
  922. Is_underflowtrap_enabled()) {
  923. /* need to normalize results mantissa */
  924. sign_save = Dbl_signextendedsign(opnd3p1);
  925. result_exponent = 0;
  926. Dbl_leftshiftby1(opnd3p1,opnd3p2);
  927. Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
  928. Dbl_set_sign(opnd3p1,/*using*/sign_save);
  929. Dbl_setwrapped_exponent(opnd3p1,result_exponent,
  930. unfl);
  931. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  932. /* inexact = FALSE */
  933. return(OPC_2E_UNDERFLOWEXCEPTION);
  934. }
  935. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  936. return(NOEXCEPTION);
  937. }
  938. /* is denormalized, adjust exponent */
  939. Dbl_clear_signexponent(opnd1p1);
  940. Dbl_leftshiftby1(opnd1p1,opnd1p2);
  941. Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
  942. }
  943. /* opnd2 needs to have hidden bit set with msb in hidden bit */
  944. if (Dbl_isnotzero_exponent(opnd2p1)) {
  945. Dbl_clear_signexponent_set_hidden(opnd2p1);
  946. }
  947. else {
  948. /* check for zero */
  949. if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
  950. /*
  951. * Perform the add opnd3 with zero here.
  952. */
  953. if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
  954. if (Is_rounding_mode(ROUNDMINUS)) {
  955. Dbl_or_signs(opnd3p1,resultp1);
  956. } else {
  957. Dbl_and_signs(opnd3p1,resultp1);
  958. }
  959. }
  960. /*
  961. * Now let's check for trapped underflow case.
  962. */
  963. else if (Dbl_iszero_exponent(opnd3p1) &&
  964. Is_underflowtrap_enabled()) {
  965. /* need to normalize results mantissa */
  966. sign_save = Dbl_signextendedsign(opnd3p1);
  967. result_exponent = 0;
  968. Dbl_leftshiftby1(opnd3p1,opnd3p2);
  969. Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
  970. Dbl_set_sign(opnd3p1,/*using*/sign_save);
  971. Dbl_setwrapped_exponent(opnd3p1,result_exponent,
  972. unfl);
  973. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  974. /* inexact = FALSE */
  975. return(OPC_2E_UNDERFLOWEXCEPTION);
  976. }
  977. Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
  978. return(NOEXCEPTION);
  979. }
  980. /* is denormalized; want to normalize */
  981. Dbl_clear_signexponent(opnd2p1);
  982. Dbl_leftshiftby1(opnd2p1,opnd2p2);
  983. Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
  984. }
  985. /* Multiply the first two source mantissas together */
  986. /*
  987. * The intermediate result will be kept in tmpres,
  988. * which needs enough room for 106 bits of mantissa,
  989. * so lets call it a Double extended.
  990. */
  991. Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
  992. /*
  993. * Four bits at a time are inspected in each loop, and a
  994. * simple shift and add multiply algorithm is used.
  995. */
  996. for (count = DBL_P-1; count >= 0; count -= 4) {
  997. Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
  998. if (Dbit28p2(opnd1p2)) {
  999. /* Fourword_add should be an ADD followed by 3 ADDC's */
  1000. Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
  1001. opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
  1002. }
  1003. if (Dbit29p2(opnd1p2)) {
  1004. Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
  1005. opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
  1006. }
  1007. if (Dbit30p2(opnd1p2)) {
  1008. Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
  1009. opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
  1010. }
  1011. if (Dbit31p2(opnd1p2)) {
  1012. Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
  1013. opnd2p1, opnd2p2, 0, 0);
  1014. }
  1015. Dbl_rightshiftby4(opnd1p1,opnd1p2);
  1016. }
  1017. if (Is_dexthiddenoverflow(tmpresp1)) {
  1018. /* result mantissa >= 2 (mantissa overflow) */
  1019. mpy_exponent++;
  1020. Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
  1021. }
  1022. /*
  1023. * Restore the sign of the mpy result which was saved in resultp1.
  1024. * The exponent will continue to be kept in mpy_exponent.
  1025. */
  1026. Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
  1027. /*
  1028. * No rounding is required, since the result of the multiply
  1029. * is exact in the extended format.
  1030. */
  1031. /*
  1032. * Now we are ready to perform the add portion of the operation.
  1033. *
  1034. * The exponents need to be kept as integers for now, since the
  1035. * multiply result might not fit into the exponent field. We
  1036. * can't overflow or underflow because of this yet, since the
  1037. * add could bring the final result back into range.
  1038. */
  1039. add_exponent = Dbl_exponent(opnd3p1);
  1040. /*
  1041. * Check for denormalized or zero add operand.
  1042. */
  1043. if (add_exponent == 0) {
  1044. /* check for zero */
  1045. if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
  1046. /* right is zero */
  1047. /* Left can't be zero and must be result.
  1048. *
  1049. * The final result is now in tmpres and mpy_exponent,
  1050. * and needs to be rounded and squeezed back into
  1051. * double precision format from double extended.
  1052. */
  1053. result_exponent = mpy_exponent;
  1054. Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
  1055. resultp1,resultp2,resultp3,resultp4);
  1056. sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
  1057. goto round;
  1058. }
  1059. /*
  1060. * Neither are zeroes.
  1061. * Adjust exponent and normalize add operand.
  1062. */
  1063. sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
  1064. Dbl_clear_signexponent(opnd3p1);
  1065. Dbl_leftshiftby1(opnd3p1,opnd3p2);
  1066. Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
  1067. Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
  1068. } else {
  1069. Dbl_clear_exponent_set_hidden(opnd3p1);
  1070. }
  1071. /*
  1072. * Copy opnd3 to the double extended variable called right.
  1073. */
  1074. Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
  1075. /*
  1076. * A zero "save" helps discover equal operands (for later),
  1077. * and is used in swapping operands (if needed).
  1078. */
  1079. Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
  1080. /*
  1081. * Compare magnitude of operands.
  1082. */
  1083. Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
  1084. Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
  1085. if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
  1086. Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
  1087. /*
  1088. * Set the left operand to the larger one by XOR swap.
  1089. * First finish the first word "save".
  1090. */
  1091. Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
  1092. Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
  1093. Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
  1094. rightp2,rightp3,rightp4);
  1095. /* also setup exponents used in rest of routine */
  1096. diff_exponent = add_exponent - mpy_exponent;
  1097. result_exponent = add_exponent;
  1098. } else {
  1099. /* also setup exponents used in rest of routine */
  1100. diff_exponent = mpy_exponent - add_exponent;
  1101. result_exponent = mpy_exponent;
  1102. }
  1103. /* Invariant: left is not smaller than right. */
  1104. /*
  1105. * Special case alignment of operands that would force alignment
  1106. * beyond the extent of the extension. A further optimization
  1107. * could special case this but only reduces the path length for
  1108. * this infrequent case.
  1109. */
  1110. if (diff_exponent > DBLEXT_THRESHOLD) {
  1111. diff_exponent = DBLEXT_THRESHOLD;
  1112. }
  1113. /* Align right operand by shifting it to the right */
  1114. Dblext_clear_sign(rightp1);
  1115. Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
  1116. /*shifted by*/diff_exponent);
  1117. /* Treat sum and difference of the operands separately. */
  1118. if ((int)save < 0) {
  1119. /*
  1120. * Difference of the two operands. Overflow can occur if the
  1121. * multiply overflowed. A borrow can occur out of the hidden
  1122. * bit and force a post normalization phase.
  1123. */
  1124. Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
  1125. rightp1,rightp2,rightp3,rightp4,
  1126. resultp1,resultp2,resultp3,resultp4);
  1127. sign_save = Dbl_signextendedsign(resultp1);
  1128. if (Dbl_iszero_hidden(resultp1)) {
  1129. /* Handle normalization */
  1130. /* A straightforward algorithm would now shift the
  1131. * result and extension left until the hidden bit
  1132. * becomes one. Not all of the extension bits need
  1133. * participate in the shift. Only the two most
  1134. * significant bits (round and guard) are needed.
  1135. * If only a single shift is needed then the guard
  1136. * bit becomes a significant low order bit and the
  1137. * extension must participate in the rounding.
  1138. * If more than a single shift is needed, then all
  1139. * bits to the right of the guard bit are zeros,
  1140. * and the guard bit may or may not be zero. */
  1141. Dblext_leftshiftby1(resultp1,resultp2,resultp3,
  1142. resultp4);
  1143. /* Need to check for a zero result. The sign and
  1144. * exponent fields have already been zeroed. The more
  1145. * efficient test of the full object can be used.
  1146. */
  1147. if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
  1148. /* Must have been "x-x" or "x+(-x)". */
  1149. if (Is_rounding_mode(ROUNDMINUS))
  1150. Dbl_setone_sign(resultp1);
  1151. Dbl_copytoptr(resultp1,resultp2,dstptr);
  1152. return(NOEXCEPTION);
  1153. }
  1154. result_exponent--;
  1155. /* Look to see if normalization is finished. */
  1156. if (Dbl_isone_hidden(resultp1)) {
  1157. /* No further normalization is needed */
  1158. goto round;
  1159. }
  1160. /* Discover first one bit to determine shift amount.
  1161. * Use a modified binary search. We have already
  1162. * shifted the result one position right and still
  1163. * not found a one so the remainder of the extension
  1164. * must be zero and simplifies rounding. */
  1165. /* Scan bytes */
  1166. while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
  1167. Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
  1168. result_exponent -= 8;
  1169. }
  1170. /* Now narrow it down to the nibble */
  1171. if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
  1172. /* The lower nibble contains the
  1173. * normalizing one */
  1174. Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
  1175. result_exponent -= 4;
  1176. }
  1177. /* Select case where first bit is set (already
  1178. * normalized) otherwise select the proper shift. */
  1179. jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
  1180. if (jumpsize <= 7) switch(jumpsize) {
  1181. case 1:
  1182. Dblext_leftshiftby3(resultp1,resultp2,resultp3,
  1183. resultp4);
  1184. result_exponent -= 3;
  1185. break;
  1186. case 2:
  1187. case 3:
  1188. Dblext_leftshiftby2(resultp1,resultp2,resultp3,
  1189. resultp4);
  1190. result_exponent -= 2;
  1191. break;
  1192. case 4:
  1193. case 5:
  1194. case 6:
  1195. case 7:
  1196. Dblext_leftshiftby1(resultp1,resultp2,resultp3,
  1197. resultp4);
  1198. result_exponent -= 1;
  1199. break;
  1200. }
  1201. } /* end if (hidden...)... */
  1202. /* Fall through and round */
  1203. } /* end if (save < 0)... */
  1204. else {
  1205. /* Add magnitudes */
  1206. Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
  1207. rightp1,rightp2,rightp3,rightp4,
  1208. /*to*/resultp1,resultp2,resultp3,resultp4);
  1209. sign_save = Dbl_signextendedsign(resultp1);
  1210. if (Dbl_isone_hiddenoverflow(resultp1)) {
  1211. /* Prenormalization required. */
  1212. Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
  1213. resultp4);
  1214. result_exponent++;
  1215. } /* end if hiddenoverflow... */
  1216. } /* end else ...add magnitudes... */
  1217. /* Round the result. If the extension and lower two words are
  1218. * all zeros, then the result is exact. Otherwise round in the
  1219. * correct direction. Underflow is possible. If a postnormalization
  1220. * is necessary, then the mantissa is all zeros so no shift is needed.
  1221. */
  1222. round:
  1223. if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
  1224. Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
  1225. result_exponent,is_tiny);
  1226. }
  1227. Dbl_set_sign(resultp1,/*using*/sign_save);
  1228. if (Dblext_isnotzero_mantissap3(resultp3) ||
  1229. Dblext_isnotzero_mantissap4(resultp4)) {
  1230. inexact = TRUE;
  1231. switch(Rounding_mode()) {
  1232. case ROUNDNEAREST: /* The default. */
  1233. if (Dblext_isone_highp3(resultp3)) {
  1234. /* at least 1/2 ulp */
  1235. if (Dblext_isnotzero_low31p3(resultp3) ||
  1236. Dblext_isnotzero_mantissap4(resultp4) ||
  1237. Dblext_isone_lowp2(resultp2)) {
  1238. /* either exactly half way and odd or
  1239. * more than 1/2ulp */
  1240. Dbl_increment(resultp1,resultp2);
  1241. }
  1242. }
  1243. break;
  1244. case ROUNDPLUS:
  1245. if (Dbl_iszero_sign(resultp1)) {
  1246. /* Round up positive results */
  1247. Dbl_increment(resultp1,resultp2);
  1248. }
  1249. break;
  1250. case ROUNDMINUS:
  1251. if (Dbl_isone_sign(resultp1)) {
  1252. /* Round down negative results */
  1253. Dbl_increment(resultp1,resultp2);
  1254. }
  1255. case ROUNDZERO:;
  1256. /* truncate is simple */
  1257. } /* end switch... */
  1258. if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
  1259. }
  1260. if (result_exponent >= DBL_INFINITY_EXPONENT) {
  1261. /* Overflow */
  1262. if (Is_overflowtrap_enabled()) {
  1263. /*
  1264. * Adjust bias of result
  1265. */
  1266. Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
  1267. Dbl_copytoptr(resultp1,resultp2,dstptr);
  1268. if (inexact)
  1269. if (Is_inexacttrap_enabled())
  1270. return (OPC_2E_OVERFLOWEXCEPTION |
  1271. OPC_2E_INEXACTEXCEPTION);
  1272. else Set_inexactflag();
  1273. return (OPC_2E_OVERFLOWEXCEPTION);
  1274. }
  1275. inexact = TRUE;
  1276. Set_overflowflag();
  1277. Dbl_setoverflow(resultp1,resultp2);
  1278. } else if (result_exponent <= 0) { /* underflow case */
  1279. if (Is_underflowtrap_enabled()) {
  1280. /*
  1281. * Adjust bias of result
  1282. */
  1283. Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
  1284. Dbl_copytoptr(resultp1,resultp2,dstptr);
  1285. if (inexact)
  1286. if (Is_inexacttrap_enabled())
  1287. return (OPC_2E_UNDERFLOWEXCEPTION |
  1288. OPC_2E_INEXACTEXCEPTION);
  1289. else Set_inexactflag();
  1290. return(OPC_2E_UNDERFLOWEXCEPTION);
  1291. }
  1292. else if (inexact && is_tiny) Set_underflowflag();
  1293. }
  1294. else Dbl_set_exponent(resultp1,result_exponent);
  1295. Dbl_copytoptr(resultp1,resultp2,dstptr);
  1296. if (inexact)
  1297. if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
  1298. else Set_inexactflag();
  1299. return(NOEXCEPTION);
  1300. }
  1301. /*
  1302. * Single Floating-point Multiply Fused Add
  1303. */
  1304. sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  1305. sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
  1306. unsigned int *status;
  1307. {
  1308. unsigned int opnd1, opnd2, opnd3;
  1309. register unsigned int tmpresp1, tmpresp2;
  1310. unsigned int rightp1, rightp2;
  1311. unsigned int resultp1, resultp2 = 0;
  1312. register int mpy_exponent, add_exponent, count;
  1313. boolean inexact = FALSE, is_tiny = FALSE;
  1314. unsigned int signlessleft1, signlessright1, save;
  1315. register int result_exponent, diff_exponent;
  1316. int sign_save, jumpsize;
  1317. Sgl_copyfromptr(src1ptr,opnd1);
  1318. Sgl_copyfromptr(src2ptr,opnd2);
  1319. Sgl_copyfromptr(src3ptr,opnd3);
  1320. /*
  1321. * set sign bit of result of multiply
  1322. */
  1323. if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
  1324. Sgl_setnegativezero(resultp1);
  1325. else Sgl_setzero(resultp1);
  1326. /*
  1327. * Generate multiply exponent
  1328. */
  1329. mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
  1330. /*
  1331. * check first operand for NaN's or infinity
  1332. */
  1333. if (Sgl_isinfinity_exponent(opnd1)) {
  1334. if (Sgl_iszero_mantissa(opnd1)) {
  1335. if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
  1336. if (Sgl_iszero_exponentmantissa(opnd2)) {
  1337. /*
  1338. * invalid since operands are infinity
  1339. * and zero
  1340. */
  1341. if (Is_invalidtrap_enabled())
  1342. return(OPC_2E_INVALIDEXCEPTION);
  1343. Set_invalidflag();
  1344. Sgl_makequietnan(resultp1);
  1345. Sgl_copytoptr(resultp1,dstptr);
  1346. return(NOEXCEPTION);
  1347. }
  1348. /*
  1349. * Check third operand for infinity with a
  1350. * sign opposite of the multiply result
  1351. */
  1352. if (Sgl_isinfinity(opnd3) &&
  1353. (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
  1354. /*
  1355. * invalid since attempting a magnitude
  1356. * subtraction of infinities
  1357. */
  1358. if (Is_invalidtrap_enabled())
  1359. return(OPC_2E_INVALIDEXCEPTION);
  1360. Set_invalidflag();
  1361. Sgl_makequietnan(resultp1);
  1362. Sgl_copytoptr(resultp1,dstptr);
  1363. return(NOEXCEPTION);
  1364. }
  1365. /*
  1366. * return infinity
  1367. */
  1368. Sgl_setinfinity_exponentmantissa(resultp1);
  1369. Sgl_copytoptr(resultp1,dstptr);
  1370. return(NOEXCEPTION);
  1371. }
  1372. }
  1373. else {
  1374. /*
  1375. * is NaN; signaling or quiet?
  1376. */
  1377. if (Sgl_isone_signaling(opnd1)) {
  1378. /* trap if INVALIDTRAP enabled */
  1379. if (Is_invalidtrap_enabled())
  1380. return(OPC_2E_INVALIDEXCEPTION);
  1381. /* make NaN quiet */
  1382. Set_invalidflag();
  1383. Sgl_set_quiet(opnd1);
  1384. }
  1385. /*
  1386. * is second operand a signaling NaN?
  1387. */
  1388. else if (Sgl_is_signalingnan(opnd2)) {
  1389. /* trap if INVALIDTRAP enabled */
  1390. if (Is_invalidtrap_enabled())
  1391. return(OPC_2E_INVALIDEXCEPTION);
  1392. /* make NaN quiet */
  1393. Set_invalidflag();
  1394. Sgl_set_quiet(opnd2);
  1395. Sgl_copytoptr(opnd2,dstptr);
  1396. return(NOEXCEPTION);
  1397. }
  1398. /*
  1399. * is third operand a signaling NaN?
  1400. */
  1401. else if (Sgl_is_signalingnan(opnd3)) {
  1402. /* trap if INVALIDTRAP enabled */
  1403. if (Is_invalidtrap_enabled())
  1404. return(OPC_2E_INVALIDEXCEPTION);
  1405. /* make NaN quiet */
  1406. Set_invalidflag();
  1407. Sgl_set_quiet(opnd3);
  1408. Sgl_copytoptr(opnd3,dstptr);
  1409. return(NOEXCEPTION);
  1410. }
  1411. /*
  1412. * return quiet NaN
  1413. */
  1414. Sgl_copytoptr(opnd1,dstptr);
  1415. return(NOEXCEPTION);
  1416. }
  1417. }
  1418. /*
  1419. * check second operand for NaN's or infinity
  1420. */
  1421. if (Sgl_isinfinity_exponent(opnd2)) {
  1422. if (Sgl_iszero_mantissa(opnd2)) {
  1423. if (Sgl_isnotnan(opnd3)) {
  1424. if (Sgl_iszero_exponentmantissa(opnd1)) {
  1425. /*
  1426. * invalid since multiply operands are
  1427. * zero & infinity
  1428. */
  1429. if (Is_invalidtrap_enabled())
  1430. return(OPC_2E_INVALIDEXCEPTION);
  1431. Set_invalidflag();
  1432. Sgl_makequietnan(opnd2);
  1433. Sgl_copytoptr(opnd2,dstptr);
  1434. return(NOEXCEPTION);
  1435. }
  1436. /*
  1437. * Check third operand for infinity with a
  1438. * sign opposite of the multiply result
  1439. */
  1440. if (Sgl_isinfinity(opnd3) &&
  1441. (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
  1442. /*
  1443. * invalid since attempting a magnitude
  1444. * subtraction of infinities
  1445. */
  1446. if (Is_invalidtrap_enabled())
  1447. return(OPC_2E_INVALIDEXCEPTION);
  1448. Set_invalidflag();
  1449. Sgl_makequietnan(resultp1);
  1450. Sgl_copytoptr(resultp1,dstptr);
  1451. return(NOEXCEPTION);
  1452. }
  1453. /*
  1454. * return infinity
  1455. */
  1456. Sgl_setinfinity_exponentmantissa(resultp1);
  1457. Sgl_copytoptr(resultp1,dstptr);
  1458. return(NOEXCEPTION);
  1459. }
  1460. }
  1461. else {
  1462. /*
  1463. * is NaN; signaling or quiet?
  1464. */
  1465. if (Sgl_isone_signaling(opnd2)) {
  1466. /* trap if INVALIDTRAP enabled */
  1467. if (Is_invalidtrap_enabled())
  1468. return(OPC_2E_INVALIDEXCEPTION);
  1469. /* make NaN quiet */
  1470. Set_invalidflag();
  1471. Sgl_set_quiet(opnd2);
  1472. }
  1473. /*
  1474. * is third operand a signaling NaN?
  1475. */
  1476. else if (Sgl_is_signalingnan(opnd3)) {
  1477. /* trap if INVALIDTRAP enabled */
  1478. if (Is_invalidtrap_enabled())
  1479. return(OPC_2E_INVALIDEXCEPTION);
  1480. /* make NaN quiet */
  1481. Set_invalidflag();
  1482. Sgl_set_quiet(opnd3);
  1483. Sgl_copytoptr(opnd3,dstptr);
  1484. return(NOEXCEPTION);
  1485. }
  1486. /*
  1487. * return quiet NaN
  1488. */
  1489. Sgl_copytoptr(opnd2,dstptr);
  1490. return(NOEXCEPTION);
  1491. }
  1492. }
  1493. /*
  1494. * check third operand for NaN's or infinity
  1495. */
  1496. if (Sgl_isinfinity_exponent(opnd3)) {
  1497. if (Sgl_iszero_mantissa(opnd3)) {
  1498. /* return infinity */
  1499. Sgl_copytoptr(opnd3,dstptr);
  1500. return(NOEXCEPTION);
  1501. } else {
  1502. /*
  1503. * is NaN; signaling or quiet?
  1504. */
  1505. if (Sgl_isone_signaling(opnd3)) {
  1506. /* trap if INVALIDTRAP enabled */
  1507. if (Is_invalidtrap_enabled())
  1508. return(OPC_2E_INVALIDEXCEPTION);
  1509. /* make NaN quiet */
  1510. Set_invalidflag();
  1511. Sgl_set_quiet(opnd3);
  1512. }
  1513. /*
  1514. * return quiet NaN
  1515. */
  1516. Sgl_copytoptr(opnd3,dstptr);
  1517. return(NOEXCEPTION);
  1518. }
  1519. }
  1520. /*
  1521. * Generate multiply mantissa
  1522. */
  1523. if (Sgl_isnotzero_exponent(opnd1)) {
  1524. /* set hidden bit */
  1525. Sgl_clear_signexponent_set_hidden(opnd1);
  1526. }
  1527. else {
  1528. /* check for zero */
  1529. if (Sgl_iszero_mantissa(opnd1)) {
  1530. /*
  1531. * Perform the add opnd3 with zero here.
  1532. */
  1533. if (Sgl_iszero_exponentmantissa(opnd3)) {
  1534. if (Is_rounding_mode(ROUNDMINUS)) {
  1535. Sgl_or_signs(opnd3,resultp1);
  1536. } else {
  1537. Sgl_and_signs(opnd3,resultp1);
  1538. }
  1539. }
  1540. /*
  1541. * Now let's check for trapped underflow case.
  1542. */
  1543. else if (Sgl_iszero_exponent(opnd3) &&
  1544. Is_underflowtrap_enabled()) {
  1545. /* need to normalize results mantissa */
  1546. sign_save = Sgl_signextendedsign(opnd3);
  1547. result_exponent = 0;
  1548. Sgl_leftshiftby1(opnd3);
  1549. Sgl_normalize(opnd3,result_exponent);
  1550. Sgl_set_sign(opnd3,/*using*/sign_save);
  1551. Sgl_setwrapped_exponent(opnd3,result_exponent,
  1552. unfl);
  1553. Sgl_copytoptr(opnd3,dstptr);
  1554. /* inexact = FALSE */
  1555. return(OPC_2E_UNDERFLOWEXCEPTION);
  1556. }
  1557. Sgl_copytoptr(opnd3,dstptr);
  1558. return(NOEXCEPTION);
  1559. }
  1560. /* is denormalized, adjust exponent */
  1561. Sgl_clear_signexponent(opnd1);
  1562. Sgl_leftshiftby1(opnd1);
  1563. Sgl_normalize(opnd1,mpy_exponent);
  1564. }
  1565. /* opnd2 needs to have hidden bit set with msb in hidden bit */
  1566. if (Sgl_isnotzero_exponent(opnd2)) {
  1567. Sgl_clear_signexponent_set_hidden(opnd2);
  1568. }
  1569. else {
  1570. /* check for zero */
  1571. if (Sgl_iszero_mantissa(opnd2)) {
  1572. /*
  1573. * Perform the add opnd3 with zero here.
  1574. */
  1575. if (Sgl_iszero_exponentmantissa(opnd3)) {
  1576. if (Is_rounding_mode(ROUNDMINUS)) {
  1577. Sgl_or_signs(opnd3,resultp1);
  1578. } else {
  1579. Sgl_and_signs(opnd3,resultp1);
  1580. }
  1581. }
  1582. /*
  1583. * Now let's check for trapped underflow case.
  1584. */
  1585. else if (Sgl_iszero_exponent(opnd3) &&
  1586. Is_underflowtrap_enabled()) {
  1587. /* need to normalize results mantissa */
  1588. sign_save = Sgl_signextendedsign(opnd3);
  1589. result_exponent = 0;
  1590. Sgl_leftshiftby1(opnd3);
  1591. Sgl_normalize(opnd3,result_exponent);
  1592. Sgl_set_sign(opnd3,/*using*/sign_save);
  1593. Sgl_setwrapped_exponent(opnd3,result_exponent,
  1594. unfl);
  1595. Sgl_copytoptr(opnd3,dstptr);
  1596. /* inexact = FALSE */
  1597. return(OPC_2E_UNDERFLOWEXCEPTION);
  1598. }
  1599. Sgl_copytoptr(opnd3,dstptr);
  1600. return(NOEXCEPTION);
  1601. }
  1602. /* is denormalized; want to normalize */
  1603. Sgl_clear_signexponent(opnd2);
  1604. Sgl_leftshiftby1(opnd2);
  1605. Sgl_normalize(opnd2,mpy_exponent);
  1606. }
  1607. /* Multiply the first two source mantissas together */
  1608. /*
  1609. * The intermediate result will be kept in tmpres,
  1610. * which needs enough room for 106 bits of mantissa,
  1611. * so lets call it a Double extended.
  1612. */
  1613. Sglext_setzero(tmpresp1,tmpresp2);
  1614. /*
  1615. * Four bits at a time are inspected in each loop, and a
  1616. * simple shift and add multiply algorithm is used.
  1617. */
  1618. for (count = SGL_P-1; count >= 0; count -= 4) {
  1619. Sglext_rightshiftby4(tmpresp1,tmpresp2);
  1620. if (Sbit28(opnd1)) {
  1621. /* Twoword_add should be an ADD followed by 2 ADDC's */
  1622. Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
  1623. }
  1624. if (Sbit29(opnd1)) {
  1625. Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
  1626. }
  1627. if (Sbit30(opnd1)) {
  1628. Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
  1629. }
  1630. if (Sbit31(opnd1)) {
  1631. Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
  1632. }
  1633. Sgl_rightshiftby4(opnd1);
  1634. }
  1635. if (Is_sexthiddenoverflow(tmpresp1)) {
  1636. /* result mantissa >= 2 (mantissa overflow) */
  1637. mpy_exponent++;
  1638. Sglext_rightshiftby4(tmpresp1,tmpresp2);
  1639. } else {
  1640. Sglext_rightshiftby3(tmpresp1,tmpresp2);
  1641. }
  1642. /*
  1643. * Restore the sign of the mpy result which was saved in resultp1.
  1644. * The exponent will continue to be kept in mpy_exponent.
  1645. */
  1646. Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
  1647. /*
  1648. * No rounding is required, since the result of the multiply
  1649. * is exact in the extended format.
  1650. */
  1651. /*
  1652. * Now we are ready to perform the add portion of the operation.
  1653. *
  1654. * The exponents need to be kept as integers for now, since the
  1655. * multiply result might not fit into the exponent field. We
  1656. * can't overflow or underflow because of this yet, since the
  1657. * add could bring the final result back into range.
  1658. */
  1659. add_exponent = Sgl_exponent(opnd3);
  1660. /*
  1661. * Check for denormalized or zero add operand.
  1662. */
  1663. if (add_exponent == 0) {
  1664. /* check for zero */
  1665. if (Sgl_iszero_mantissa(opnd3)) {
  1666. /* right is zero */
  1667. /* Left can't be zero and must be result.
  1668. *
  1669. * The final result is now in tmpres and mpy_exponent,
  1670. * and needs to be rounded and squeezed back into
  1671. * double precision format from double extended.
  1672. */
  1673. result_exponent = mpy_exponent;
  1674. Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
  1675. sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
  1676. goto round;
  1677. }
  1678. /*
  1679. * Neither are zeroes.
  1680. * Adjust exponent and normalize add operand.
  1681. */
  1682. sign_save = Sgl_signextendedsign(opnd3); /* save sign */
  1683. Sgl_clear_signexponent(opnd3);
  1684. Sgl_leftshiftby1(opnd3);
  1685. Sgl_normalize(opnd3,add_exponent);
  1686. Sgl_set_sign(opnd3,sign_save); /* restore sign */
  1687. } else {
  1688. Sgl_clear_exponent_set_hidden(opnd3);
  1689. }
  1690. /*
  1691. * Copy opnd3 to the double extended variable called right.
  1692. */
  1693. Sgl_copyto_sglext(opnd3,rightp1,rightp2);
  1694. /*
  1695. * A zero "save" helps discover equal operands (for later),
  1696. * and is used in swapping operands (if needed).
  1697. */
  1698. Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
  1699. /*
  1700. * Compare magnitude of operands.
  1701. */
  1702. Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
  1703. Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
  1704. if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
  1705. Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
  1706. /*
  1707. * Set the left operand to the larger one by XOR swap.
  1708. * First finish the first word "save".
  1709. */
  1710. Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
  1711. Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
  1712. Sglext_swap_lower(tmpresp2,rightp2);
  1713. /* also setup exponents used in rest of routine */
  1714. diff_exponent = add_exponent - mpy_exponent;
  1715. result_exponent = add_exponent;
  1716. } else {
  1717. /* also setup exponents used in rest of routine */
  1718. diff_exponent = mpy_exponent - add_exponent;
  1719. result_exponent = mpy_exponent;
  1720. }
  1721. /* Invariant: left is not smaller than right. */
  1722. /*
  1723. * Special case alignment of operands that would force alignment
  1724. * beyond the extent of the extension. A further optimization
  1725. * could special case this but only reduces the path length for
  1726. * this infrequent case.
  1727. */
  1728. if (diff_exponent > SGLEXT_THRESHOLD) {
  1729. diff_exponent = SGLEXT_THRESHOLD;
  1730. }
  1731. /* Align right operand by shifting it to the right */
  1732. Sglext_clear_sign(rightp1);
  1733. Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
  1734. /* Treat sum and difference of the operands separately. */
  1735. if ((int)save < 0) {
  1736. /*
  1737. * Difference of the two operands. Overflow can occur if the
  1738. * multiply overflowed. A borrow can occur out of the hidden
  1739. * bit and force a post normalization phase.
  1740. */
  1741. Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
  1742. resultp1,resultp2);
  1743. sign_save = Sgl_signextendedsign(resultp1);
  1744. if (Sgl_iszero_hidden(resultp1)) {
  1745. /* Handle normalization */
  1746. /* A straightforward algorithm would now shift the
  1747. * result and extension left until the hidden bit
  1748. * becomes one. Not all of the extension bits need
  1749. * participate in the shift. Only the two most
  1750. * significant bits (round and guard) are needed.
  1751. * If only a single shift is needed then the guard
  1752. * bit becomes a significant low order bit and the
  1753. * extension must participate in the rounding.
  1754. * If more than a single shift is needed, then all
  1755. * bits to the right of the guard bit are zeros,
  1756. * and the guard bit may or may not be zero. */
  1757. Sglext_leftshiftby1(resultp1,resultp2);
  1758. /* Need to check for a zero result. The sign and
  1759. * exponent fields have already been zeroed. The more
  1760. * efficient test of the full object can be used.
  1761. */
  1762. if (Sglext_iszero(resultp1,resultp2)) {
  1763. /* Must have been "x-x" or "x+(-x)". */
  1764. if (Is_rounding_mode(ROUNDMINUS))
  1765. Sgl_setone_sign(resultp1);
  1766. Sgl_copytoptr(resultp1,dstptr);
  1767. return(NOEXCEPTION);
  1768. }
  1769. result_exponent--;
  1770. /* Look to see if normalization is finished. */
  1771. if (Sgl_isone_hidden(resultp1)) {
  1772. /* No further normalization is needed */
  1773. goto round;
  1774. }
  1775. /* Discover first one bit to determine shift amount.
  1776. * Use a modified binary search. We have already
  1777. * shifted the result one position right and still
  1778. * not found a one so the remainder of the extension
  1779. * must be zero and simplifies rounding. */
  1780. /* Scan bytes */
  1781. while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
  1782. Sglext_leftshiftby8(resultp1,resultp2);
  1783. result_exponent -= 8;
  1784. }
  1785. /* Now narrow it down to the nibble */
  1786. if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
  1787. /* The lower nibble contains the
  1788. * normalizing one */
  1789. Sglext_leftshiftby4(resultp1,resultp2);
  1790. result_exponent -= 4;
  1791. }
  1792. /* Select case where first bit is set (already
  1793. * normalized) otherwise select the proper shift. */
  1794. jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
  1795. if (jumpsize <= 7) switch(jumpsize) {
  1796. case 1:
  1797. Sglext_leftshiftby3(resultp1,resultp2);
  1798. result_exponent -= 3;
  1799. break;
  1800. case 2:
  1801. case 3:
  1802. Sglext_leftshiftby2(resultp1,resultp2);
  1803. result_exponent -= 2;
  1804. break;
  1805. case 4:
  1806. case 5:
  1807. case 6:
  1808. case 7:
  1809. Sglext_leftshiftby1(resultp1,resultp2);
  1810. result_exponent -= 1;
  1811. break;
  1812. }
  1813. } /* end if (hidden...)... */
  1814. /* Fall through and round */
  1815. } /* end if (save < 0)... */
  1816. else {
  1817. /* Add magnitudes */
  1818. Sglext_addition(tmpresp1,tmpresp2,
  1819. rightp1,rightp2, /*to*/resultp1,resultp2);
  1820. sign_save = Sgl_signextendedsign(resultp1);
  1821. if (Sgl_isone_hiddenoverflow(resultp1)) {
  1822. /* Prenormalization required. */
  1823. Sglext_arithrightshiftby1(resultp1,resultp2);
  1824. result_exponent++;
  1825. } /* end if hiddenoverflow... */
  1826. } /* end else ...add magnitudes... */
  1827. /* Round the result. If the extension and lower two words are
  1828. * all zeros, then the result is exact. Otherwise round in the
  1829. * correct direction. Underflow is possible. If a postnormalization
  1830. * is necessary, then the mantissa is all zeros so no shift is needed.
  1831. */
  1832. round:
  1833. if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
  1834. Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
  1835. }
  1836. Sgl_set_sign(resultp1,/*using*/sign_save);
  1837. if (Sglext_isnotzero_mantissap2(resultp2)) {
  1838. inexact = TRUE;
  1839. switch(Rounding_mode()) {
  1840. case ROUNDNEAREST: /* The default. */
  1841. if (Sglext_isone_highp2(resultp2)) {
  1842. /* at least 1/2 ulp */
  1843. if (Sglext_isnotzero_low31p2(resultp2) ||
  1844. Sglext_isone_lowp1(resultp1)) {
  1845. /* either exactly half way and odd or
  1846. * more than 1/2ulp */
  1847. Sgl_increment(resultp1);
  1848. }
  1849. }
  1850. break;
  1851. case ROUNDPLUS:
  1852. if (Sgl_iszero_sign(resultp1)) {
  1853. /* Round up positive results */
  1854. Sgl_increment(resultp1);
  1855. }
  1856. break;
  1857. case ROUNDMINUS:
  1858. if (Sgl_isone_sign(resultp1)) {
  1859. /* Round down negative results */
  1860. Sgl_increment(resultp1);
  1861. }
  1862. case ROUNDZERO:;
  1863. /* truncate is simple */
  1864. } /* end switch... */
  1865. if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
  1866. }
  1867. if (result_exponent >= SGL_INFINITY_EXPONENT) {
  1868. /* Overflow */
  1869. if (Is_overflowtrap_enabled()) {
  1870. /*
  1871. * Adjust bias of result
  1872. */
  1873. Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
  1874. Sgl_copytoptr(resultp1,dstptr);
  1875. if (inexact)
  1876. if (Is_inexacttrap_enabled())
  1877. return (OPC_2E_OVERFLOWEXCEPTION |
  1878. OPC_2E_INEXACTEXCEPTION);
  1879. else Set_inexactflag();
  1880. return (OPC_2E_OVERFLOWEXCEPTION);
  1881. }
  1882. inexact = TRUE;
  1883. Set_overflowflag();
  1884. Sgl_setoverflow(resultp1);
  1885. } else if (result_exponent <= 0) { /* underflow case */
  1886. if (Is_underflowtrap_enabled()) {
  1887. /*
  1888. * Adjust bias of result
  1889. */
  1890. Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
  1891. Sgl_copytoptr(resultp1,dstptr);
  1892. if (inexact)
  1893. if (Is_inexacttrap_enabled())
  1894. return (OPC_2E_UNDERFLOWEXCEPTION |
  1895. OPC_2E_INEXACTEXCEPTION);
  1896. else Set_inexactflag();
  1897. return(OPC_2E_UNDERFLOWEXCEPTION);
  1898. }
  1899. else if (inexact && is_tiny) Set_underflowflag();
  1900. }
  1901. else Sgl_set_exponent(resultp1,result_exponent);
  1902. Sgl_copytoptr(resultp1,dstptr);
  1903. if (inexact)
  1904. if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
  1905. else Set_inexactflag();
  1906. return(NOEXCEPTION);
  1907. }
  1908. /*
  1909. * Single Floating-point Multiply Negate Fused Add
  1910. */
  1911. sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
  1912. sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
  1913. unsigned int *status;
  1914. {
  1915. unsigned int opnd1, opnd2, opnd3;
  1916. register unsigned int tmpresp1, tmpresp2;
  1917. unsigned int rightp1, rightp2;
  1918. unsigned int resultp1, resultp2 = 0;
  1919. register int mpy_exponent, add_exponent, count;
  1920. boolean inexact = FALSE, is_tiny = FALSE;
  1921. unsigned int signlessleft1, signlessright1, save;
  1922. register int result_exponent, diff_exponent;
  1923. int sign_save, jumpsize;
  1924. Sgl_copyfromptr(src1ptr,opnd1);
  1925. Sgl_copyfromptr(src2ptr,opnd2);
  1926. Sgl_copyfromptr(src3ptr,opnd3);
  1927. /*
  1928. * set sign bit of result of multiply
  1929. */
  1930. if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
  1931. Sgl_setzero(resultp1);
  1932. else
  1933. Sgl_setnegativezero(resultp1);
  1934. /*
  1935. * Generate multiply exponent
  1936. */
  1937. mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
  1938. /*
  1939. * check first operand for NaN's or infinity
  1940. */
  1941. if (Sgl_isinfinity_exponent(opnd1)) {
  1942. if (Sgl_iszero_mantissa(opnd1)) {
  1943. if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
  1944. if (Sgl_iszero_exponentmantissa(opnd2)) {
  1945. /*
  1946. * invalid since operands are infinity
  1947. * and zero
  1948. */
  1949. if (Is_invalidtrap_enabled())
  1950. return(OPC_2E_INVALIDEXCEPTION);
  1951. Set_invalidflag();
  1952. Sgl_makequietnan(resultp1);
  1953. Sgl_copytoptr(resultp1,dstptr);
  1954. return(NOEXCEPTION);
  1955. }
  1956. /*
  1957. * Check third operand for infinity with a
  1958. * sign opposite of the multiply result
  1959. */
  1960. if (Sgl_isinfinity(opnd3) &&
  1961. (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
  1962. /*
  1963. * invalid since attempting a magnitude
  1964. * subtraction of infinities
  1965. */
  1966. if (Is_invalidtrap_enabled())
  1967. return(OPC_2E_INVALIDEXCEPTION);
  1968. Set_invalidflag();
  1969. Sgl_makequietnan(resultp1);
  1970. Sgl_copytoptr(resultp1,dstptr);
  1971. return(NOEXCEPTION);
  1972. }
  1973. /*
  1974. * return infinity
  1975. */
  1976. Sgl_setinfinity_exponentmantissa(resultp1);
  1977. Sgl_copytoptr(resultp1,dstptr);
  1978. return(NOEXCEPTION);
  1979. }
  1980. }
  1981. else {
  1982. /*
  1983. * is NaN; signaling or quiet?
  1984. */
  1985. if (Sgl_isone_signaling(opnd1)) {
  1986. /* trap if INVALIDTRAP enabled */
  1987. if (Is_invalidtrap_enabled())
  1988. return(OPC_2E_INVALIDEXCEPTION);
  1989. /* make NaN quiet */
  1990. Set_invalidflag();
  1991. Sgl_set_quiet(opnd1);
  1992. }
  1993. /*
  1994. * is second operand a signaling NaN?
  1995. */
  1996. else if (Sgl_is_signalingnan(opnd2)) {
  1997. /* trap if INVALIDTRAP enabled */
  1998. if (Is_invalidtrap_enabled())
  1999. return(OPC_2E_INVALIDEXCEPTION);
  2000. /* make NaN quiet */
  2001. Set_invalidflag();
  2002. Sgl_set_quiet(opnd2);
  2003. Sgl_copytoptr(opnd2,dstptr);
  2004. return(NOEXCEPTION);
  2005. }
  2006. /*
  2007. * is third operand a signaling NaN?
  2008. */
  2009. else if (Sgl_is_signalingnan(opnd3)) {
  2010. /* trap if INVALIDTRAP enabled */
  2011. if (Is_invalidtrap_enabled())
  2012. return(OPC_2E_INVALIDEXCEPTION);
  2013. /* make NaN quiet */
  2014. Set_invalidflag();
  2015. Sgl_set_quiet(opnd3);
  2016. Sgl_copytoptr(opnd3,dstptr);
  2017. return(NOEXCEPTION);
  2018. }
  2019. /*
  2020. * return quiet NaN
  2021. */
  2022. Sgl_copytoptr(opnd1,dstptr);
  2023. return(NOEXCEPTION);
  2024. }
  2025. }
  2026. /*
  2027. * check second operand for NaN's or infinity
  2028. */
  2029. if (Sgl_isinfinity_exponent(opnd2)) {
  2030. if (Sgl_iszero_mantissa(opnd2)) {
  2031. if (Sgl_isnotnan(opnd3)) {
  2032. if (Sgl_iszero_exponentmantissa(opnd1)) {
  2033. /*
  2034. * invalid since multiply operands are
  2035. * zero & infinity
  2036. */
  2037. if (Is_invalidtrap_enabled())
  2038. return(OPC_2E_INVALIDEXCEPTION);
  2039. Set_invalidflag();
  2040. Sgl_makequietnan(opnd2);
  2041. Sgl_copytoptr(opnd2,dstptr);
  2042. return(NOEXCEPTION);
  2043. }
  2044. /*
  2045. * Check third operand for infinity with a
  2046. * sign opposite of the multiply result
  2047. */
  2048. if (Sgl_isinfinity(opnd3) &&
  2049. (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
  2050. /*
  2051. * invalid since attempting a magnitude
  2052. * subtraction of infinities
  2053. */
  2054. if (Is_invalidtrap_enabled())
  2055. return(OPC_2E_INVALIDEXCEPTION);
  2056. Set_invalidflag();
  2057. Sgl_makequietnan(resultp1);
  2058. Sgl_copytoptr(resultp1,dstptr);
  2059. return(NOEXCEPTION);
  2060. }
  2061. /*
  2062. * return infinity
  2063. */
  2064. Sgl_setinfinity_exponentmantissa(resultp1);
  2065. Sgl_copytoptr(resultp1,dstptr);
  2066. return(NOEXCEPTION);
  2067. }
  2068. }
  2069. else {
  2070. /*
  2071. * is NaN; signaling or quiet?
  2072. */
  2073. if (Sgl_isone_signaling(opnd2)) {
  2074. /* trap if INVALIDTRAP enabled */
  2075. if (Is_invalidtrap_enabled())
  2076. return(OPC_2E_INVALIDEXCEPTION);
  2077. /* make NaN quiet */
  2078. Set_invalidflag();
  2079. Sgl_set_quiet(opnd2);
  2080. }
  2081. /*
  2082. * is third operand a signaling NaN?
  2083. */
  2084. else if (Sgl_is_signalingnan(opnd3)) {
  2085. /* trap if INVALIDTRAP enabled */
  2086. if (Is_invalidtrap_enabled())
  2087. return(OPC_2E_INVALIDEXCEPTION);
  2088. /* make NaN quiet */
  2089. Set_invalidflag();
  2090. Sgl_set_quiet(opnd3);
  2091. Sgl_copytoptr(opnd3,dstptr);
  2092. return(NOEXCEPTION);
  2093. }
  2094. /*
  2095. * return quiet NaN
  2096. */
  2097. Sgl_copytoptr(opnd2,dstptr);
  2098. return(NOEXCEPTION);
  2099. }
  2100. }
  2101. /*
  2102. * check third operand for NaN's or infinity
  2103. */
  2104. if (Sgl_isinfinity_exponent(opnd3)) {
  2105. if (Sgl_iszero_mantissa(opnd3)) {
  2106. /* return infinity */
  2107. Sgl_copytoptr(opnd3,dstptr);
  2108. return(NOEXCEPTION);
  2109. } else {
  2110. /*
  2111. * is NaN; signaling or quiet?
  2112. */
  2113. if (Sgl_isone_signaling(opnd3)) {
  2114. /* trap if INVALIDTRAP enabled */
  2115. if (Is_invalidtrap_enabled())
  2116. return(OPC_2E_INVALIDEXCEPTION);
  2117. /* make NaN quiet */
  2118. Set_invalidflag();
  2119. Sgl_set_quiet(opnd3);
  2120. }
  2121. /*
  2122. * return quiet NaN
  2123. */
  2124. Sgl_copytoptr(opnd3,dstptr);
  2125. return(NOEXCEPTION);
  2126. }
  2127. }
  2128. /*
  2129. * Generate multiply mantissa
  2130. */
  2131. if (Sgl_isnotzero_exponent(opnd1)) {
  2132. /* set hidden bit */
  2133. Sgl_clear_signexponent_set_hidden(opnd1);
  2134. }
  2135. else {
  2136. /* check for zero */
  2137. if (Sgl_iszero_mantissa(opnd1)) {
  2138. /*
  2139. * Perform the add opnd3 with zero here.
  2140. */
  2141. if (Sgl_iszero_exponentmantissa(opnd3)) {
  2142. if (Is_rounding_mode(ROUNDMINUS)) {
  2143. Sgl_or_signs(opnd3,resultp1);
  2144. } else {
  2145. Sgl_and_signs(opnd3,resultp1);
  2146. }
  2147. }
  2148. /*
  2149. * Now let's check for trapped underflow case.
  2150. */
  2151. else if (Sgl_iszero_exponent(opnd3) &&
  2152. Is_underflowtrap_enabled()) {
  2153. /* need to normalize results mantissa */
  2154. sign_save = Sgl_signextendedsign(opnd3);
  2155. result_exponent = 0;
  2156. Sgl_leftshiftby1(opnd3);
  2157. Sgl_normalize(opnd3,result_exponent);
  2158. Sgl_set_sign(opnd3,/*using*/sign_save);
  2159. Sgl_setwrapped_exponent(opnd3,result_exponent,
  2160. unfl);
  2161. Sgl_copytoptr(opnd3,dstptr);
  2162. /* inexact = FALSE */
  2163. return(OPC_2E_UNDERFLOWEXCEPTION);
  2164. }
  2165. Sgl_copytoptr(opnd3,dstptr);
  2166. return(NOEXCEPTION);
  2167. }
  2168. /* is denormalized, adjust exponent */
  2169. Sgl_clear_signexponent(opnd1);
  2170. Sgl_leftshiftby1(opnd1);
  2171. Sgl_normalize(opnd1,mpy_exponent);
  2172. }
  2173. /* opnd2 needs to have hidden bit set with msb in hidden bit */
  2174. if (Sgl_isnotzero_exponent(opnd2)) {
  2175. Sgl_clear_signexponent_set_hidden(opnd2);
  2176. }
  2177. else {
  2178. /* check for zero */
  2179. if (Sgl_iszero_mantissa(opnd2)) {
  2180. /*
  2181. * Perform the add opnd3 with zero here.
  2182. */
  2183. if (Sgl_iszero_exponentmantissa(opnd3)) {
  2184. if (Is_rounding_mode(ROUNDMINUS)) {
  2185. Sgl_or_signs(opnd3,resultp1);
  2186. } else {
  2187. Sgl_and_signs(opnd3,resultp1);
  2188. }
  2189. }
  2190. /*
  2191. * Now let's check for trapped underflow case.
  2192. */
  2193. else if (Sgl_iszero_exponent(opnd3) &&
  2194. Is_underflowtrap_enabled()) {
  2195. /* need to normalize results mantissa */
  2196. sign_save = Sgl_signextendedsign(opnd3);
  2197. result_exponent = 0;
  2198. Sgl_leftshiftby1(opnd3);
  2199. Sgl_normalize(opnd3,result_exponent);
  2200. Sgl_set_sign(opnd3,/*using*/sign_save);
  2201. Sgl_setwrapped_exponent(opnd3,result_exponent,
  2202. unfl);
  2203. Sgl_copytoptr(opnd3,dstptr);
  2204. /* inexact = FALSE */
  2205. return(OPC_2E_UNDERFLOWEXCEPTION);
  2206. }
  2207. Sgl_copytoptr(opnd3,dstptr);
  2208. return(NOEXCEPTION);
  2209. }
  2210. /* is denormalized; want to normalize */
  2211. Sgl_clear_signexponent(opnd2);
  2212. Sgl_leftshiftby1(opnd2);
  2213. Sgl_normalize(opnd2,mpy_exponent);
  2214. }
  2215. /* Multiply the first two source mantissas together */
  2216. /*
  2217. * The intermediate result will be kept in tmpres,
  2218. * which needs enough room for 106 bits of mantissa,
  2219. * so lets call it a Double extended.
  2220. */
  2221. Sglext_setzero(tmpresp1,tmpresp2);
  2222. /*
  2223. * Four bits at a time are inspected in each loop, and a
  2224. * simple shift and add multiply algorithm is used.
  2225. */
  2226. for (count = SGL_P-1; count >= 0; count -= 4) {
  2227. Sglext_rightshiftby4(tmpresp1,tmpresp2);
  2228. if (Sbit28(opnd1)) {
  2229. /* Twoword_add should be an ADD followed by 2 ADDC's */
  2230. Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
  2231. }
  2232. if (Sbit29(opnd1)) {
  2233. Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
  2234. }
  2235. if (Sbit30(opnd1)) {
  2236. Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
  2237. }
  2238. if (Sbit31(opnd1)) {
  2239. Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
  2240. }
  2241. Sgl_rightshiftby4(opnd1);
  2242. }
  2243. if (Is_sexthiddenoverflow(tmpresp1)) {
  2244. /* result mantissa >= 2 (mantissa overflow) */
  2245. mpy_exponent++;
  2246. Sglext_rightshiftby4(tmpresp1,tmpresp2);
  2247. } else {
  2248. Sglext_rightshiftby3(tmpresp1,tmpresp2);
  2249. }
  2250. /*
  2251. * Restore the sign of the mpy result which was saved in resultp1.
  2252. * The exponent will continue to be kept in mpy_exponent.
  2253. */
  2254. Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
  2255. /*
  2256. * No rounding is required, since the result of the multiply
  2257. * is exact in the extended format.
  2258. */
  2259. /*
  2260. * Now we are ready to perform the add portion of the operation.
  2261. *
  2262. * The exponents need to be kept as integers for now, since the
  2263. * multiply result might not fit into the exponent field. We
  2264. * can't overflow or underflow because of this yet, since the
  2265. * add could bring the final result back into range.
  2266. */
  2267. add_exponent = Sgl_exponent(opnd3);
  2268. /*
  2269. * Check for denormalized or zero add operand.
  2270. */
  2271. if (add_exponent == 0) {
  2272. /* check for zero */
  2273. if (Sgl_iszero_mantissa(opnd3)) {
  2274. /* right is zero */
  2275. /* Left can't be zero and must be result.
  2276. *
  2277. * The final result is now in tmpres and mpy_exponent,
  2278. * and needs to be rounded and squeezed back into
  2279. * double precision format from double extended.
  2280. */
  2281. result_exponent = mpy_exponent;
  2282. Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
  2283. sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
  2284. goto round;
  2285. }
  2286. /*
  2287. * Neither are zeroes.
  2288. * Adjust exponent and normalize add operand.
  2289. */
  2290. sign_save = Sgl_signextendedsign(opnd3); /* save sign */
  2291. Sgl_clear_signexponent(opnd3);
  2292. Sgl_leftshiftby1(opnd3);
  2293. Sgl_normalize(opnd3,add_exponent);
  2294. Sgl_set_sign(opnd3,sign_save); /* restore sign */
  2295. } else {
  2296. Sgl_clear_exponent_set_hidden(opnd3);
  2297. }
  2298. /*
  2299. * Copy opnd3 to the double extended variable called right.
  2300. */
  2301. Sgl_copyto_sglext(opnd3,rightp1,rightp2);
  2302. /*
  2303. * A zero "save" helps discover equal operands (for later),
  2304. * and is used in swapping operands (if needed).
  2305. */
  2306. Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
  2307. /*
  2308. * Compare magnitude of operands.
  2309. */
  2310. Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
  2311. Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
  2312. if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
  2313. Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
  2314. /*
  2315. * Set the left operand to the larger one by XOR swap.
  2316. * First finish the first word "save".
  2317. */
  2318. Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
  2319. Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
  2320. Sglext_swap_lower(tmpresp2,rightp2);
  2321. /* also setup exponents used in rest of routine */
  2322. diff_exponent = add_exponent - mpy_exponent;
  2323. result_exponent = add_exponent;
  2324. } else {
  2325. /* also setup exponents used in rest of routine */
  2326. diff_exponent = mpy_exponent - add_exponent;
  2327. result_exponent = mpy_exponent;
  2328. }
  2329. /* Invariant: left is not smaller than right. */
  2330. /*
  2331. * Special case alignment of operands that would force alignment
  2332. * beyond the extent of the extension. A further optimization
  2333. * could special case this but only reduces the path length for
  2334. * this infrequent case.
  2335. */
  2336. if (diff_exponent > SGLEXT_THRESHOLD) {
  2337. diff_exponent = SGLEXT_THRESHOLD;
  2338. }
  2339. /* Align right operand by shifting it to the right */
  2340. Sglext_clear_sign(rightp1);
  2341. Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
  2342. /* Treat sum and difference of the operands separately. */
  2343. if ((int)save < 0) {
  2344. /*
  2345. * Difference of the two operands. Overflow can occur if the
  2346. * multiply overflowed. A borrow can occur out of the hidden
  2347. * bit and force a post normalization phase.
  2348. */
  2349. Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
  2350. resultp1,resultp2);
  2351. sign_save = Sgl_signextendedsign(resultp1);
  2352. if (Sgl_iszero_hidden(resultp1)) {
  2353. /* Handle normalization */
  2354. /* A straightforward algorithm would now shift the
  2355. * result and extension left until the hidden bit
  2356. * becomes one. Not all of the extension bits need
  2357. * participate in the shift. Only the two most
  2358. * significant bits (round and guard) are needed.
  2359. * If only a single shift is needed then the guard
  2360. * bit becomes a significant low order bit and the
  2361. * extension must participate in the rounding.
  2362. * If more than a single shift is needed, then all
  2363. * bits to the right of the guard bit are zeros,
  2364. * and the guard bit may or may not be zero. */
  2365. Sglext_leftshiftby1(resultp1,resultp2);
  2366. /* Need to check for a zero result. The sign and
  2367. * exponent fields have already been zeroed. The more
  2368. * efficient test of the full object can be used.
  2369. */
  2370. if (Sglext_iszero(resultp1,resultp2)) {
  2371. /* Must have been "x-x" or "x+(-x)". */
  2372. if (Is_rounding_mode(ROUNDMINUS))
  2373. Sgl_setone_sign(resultp1);
  2374. Sgl_copytoptr(resultp1,dstptr);
  2375. return(NOEXCEPTION);
  2376. }
  2377. result_exponent--;
  2378. /* Look to see if normalization is finished. */
  2379. if (Sgl_isone_hidden(resultp1)) {
  2380. /* No further normalization is needed */
  2381. goto round;
  2382. }
  2383. /* Discover first one bit to determine shift amount.
  2384. * Use a modified binary search. We have already
  2385. * shifted the result one position right and still
  2386. * not found a one so the remainder of the extension
  2387. * must be zero and simplifies rounding. */
  2388. /* Scan bytes */
  2389. while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
  2390. Sglext_leftshiftby8(resultp1,resultp2);
  2391. result_exponent -= 8;
  2392. }
  2393. /* Now narrow it down to the nibble */
  2394. if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
  2395. /* The lower nibble contains the
  2396. * normalizing one */
  2397. Sglext_leftshiftby4(resultp1,resultp2);
  2398. result_exponent -= 4;
  2399. }
  2400. /* Select case where first bit is set (already
  2401. * normalized) otherwise select the proper shift. */
  2402. jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
  2403. if (jumpsize <= 7) switch(jumpsize) {
  2404. case 1:
  2405. Sglext_leftshiftby3(resultp1,resultp2);
  2406. result_exponent -= 3;
  2407. break;
  2408. case 2:
  2409. case 3:
  2410. Sglext_leftshiftby2(resultp1,resultp2);
  2411. result_exponent -= 2;
  2412. break;
  2413. case 4:
  2414. case 5:
  2415. case 6:
  2416. case 7:
  2417. Sglext_leftshiftby1(resultp1,resultp2);
  2418. result_exponent -= 1;
  2419. break;
  2420. }
  2421. } /* end if (hidden...)... */
  2422. /* Fall through and round */
  2423. } /* end if (save < 0)... */
  2424. else {
  2425. /* Add magnitudes */
  2426. Sglext_addition(tmpresp1,tmpresp2,
  2427. rightp1,rightp2, /*to*/resultp1,resultp2);
  2428. sign_save = Sgl_signextendedsign(resultp1);
  2429. if (Sgl_isone_hiddenoverflow(resultp1)) {
  2430. /* Prenormalization required. */
  2431. Sglext_arithrightshiftby1(resultp1,resultp2);
  2432. result_exponent++;
  2433. } /* end if hiddenoverflow... */
  2434. } /* end else ...add magnitudes... */
  2435. /* Round the result. If the extension and lower two words are
  2436. * all zeros, then the result is exact. Otherwise round in the
  2437. * correct direction. Underflow is possible. If a postnormalization
  2438. * is necessary, then the mantissa is all zeros so no shift is needed.
  2439. */
  2440. round:
  2441. if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
  2442. Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
  2443. }
  2444. Sgl_set_sign(resultp1,/*using*/sign_save);
  2445. if (Sglext_isnotzero_mantissap2(resultp2)) {
  2446. inexact = TRUE;
  2447. switch(Rounding_mode()) {
  2448. case ROUNDNEAREST: /* The default. */
  2449. if (Sglext_isone_highp2(resultp2)) {
  2450. /* at least 1/2 ulp */
  2451. if (Sglext_isnotzero_low31p2(resultp2) ||
  2452. Sglext_isone_lowp1(resultp1)) {
  2453. /* either exactly half way and odd or
  2454. * more than 1/2ulp */
  2455. Sgl_increment(resultp1);
  2456. }
  2457. }
  2458. break;
  2459. case ROUNDPLUS:
  2460. if (Sgl_iszero_sign(resultp1)) {
  2461. /* Round up positive results */
  2462. Sgl_increment(resultp1);
  2463. }
  2464. break;
  2465. case ROUNDMINUS:
  2466. if (Sgl_isone_sign(resultp1)) {
  2467. /* Round down negative results */
  2468. Sgl_increment(resultp1);
  2469. }
  2470. case ROUNDZERO:;
  2471. /* truncate is simple */
  2472. } /* end switch... */
  2473. if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
  2474. }
  2475. if (result_exponent >= SGL_INFINITY_EXPONENT) {
  2476. /* Overflow */
  2477. if (Is_overflowtrap_enabled()) {
  2478. /*
  2479. * Adjust bias of result
  2480. */
  2481. Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
  2482. Sgl_copytoptr(resultp1,dstptr);
  2483. if (inexact)
  2484. if (Is_inexacttrap_enabled())
  2485. return (OPC_2E_OVERFLOWEXCEPTION |
  2486. OPC_2E_INEXACTEXCEPTION);
  2487. else Set_inexactflag();
  2488. return (OPC_2E_OVERFLOWEXCEPTION);
  2489. }
  2490. inexact = TRUE;
  2491. Set_overflowflag();
  2492. Sgl_setoverflow(resultp1);
  2493. } else if (result_exponent <= 0) { /* underflow case */
  2494. if (Is_underflowtrap_enabled()) {
  2495. /*
  2496. * Adjust bias of result
  2497. */
  2498. Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
  2499. Sgl_copytoptr(resultp1,dstptr);
  2500. if (inexact)
  2501. if (Is_inexacttrap_enabled())
  2502. return (OPC_2E_UNDERFLOWEXCEPTION |
  2503. OPC_2E_INEXACTEXCEPTION);
  2504. else Set_inexactflag();
  2505. return(OPC_2E_UNDERFLOWEXCEPTION);
  2506. }
  2507. else if (inexact && is_tiny) Set_underflowflag();
  2508. }
  2509. else Sgl_set_exponent(resultp1,result_exponent);
  2510. Sgl_copytoptr(resultp1,dstptr);
  2511. if (inexact)
  2512. if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
  2513. else Set_inexactflag();
  2514. return(NOEXCEPTION);
  2515. }