slogn.S 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. |
  2. | slogn.sa 3.1 12/10/90
  3. |
  4. | slogn computes the natural logarithm of an
  5. | input value. slognd does the same except the input value is a
  6. | denormalized number. slognp1 computes log(1+X), and slognp1d
  7. | computes log(1+X) for denormalized X.
  8. |
  9. | Input: Double-extended value in memory location pointed to by address
  10. | register a0.
  11. |
  12. | Output: log(X) or log(1+X) returned in floating-point register Fp0.
  13. |
  14. | Accuracy and Monotonicity: The returned result is within 2 ulps in
  15. | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
  16. | result is subsequently rounded to double precision. The
  17. | result is provably monotonic in double precision.
  18. |
  19. | Speed: The program slogn takes approximately 190 cycles for input
  20. | argument X such that |X-1| >= 1/16, which is the usual
  21. | situation. For those arguments, slognp1 takes approximately
  22. | 210 cycles. For the less common arguments, the program will
  23. | run no worse than 10% slower.
  24. |
  25. | Algorithm:
  26. | LOGN:
  27. | Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
  28. | u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
  29. |
  30. | Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
  31. | significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
  32. | 2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
  33. |
  34. | Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
  35. | log(1+u) = poly.
  36. |
  37. | Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
  38. | by k*log(2) + (log(F) + poly). The values of log(F) are calculated
  39. | beforehand and stored in the program.
  40. |
  41. | lognp1:
  42. | Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
  43. | u where u = 2X/(2+X). Otherwise, move on to Step 2.
  44. |
  45. | Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
  46. | of the algorithm for LOGN and compute log(1+X) as
  47. | k*log(2) + log(F) + poly where poly approximates log(1+u),
  48. | u = (Y-F)/F.
  49. |
  50. | Implementation Notes:
  51. | Note 1. There are 64 different possible values for F, thus 64 log(F)'s
  52. | need to be tabulated. Moreover, the values of 1/F are also
  53. | tabulated so that the division in (Y-F)/F can be performed by a
  54. | multiplication.
  55. |
  56. | Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
  57. | Y-F has to be calculated carefully when 1/2 <= X < 3/2.
  58. |
  59. | Note 3. To fully exploit the pipeline, polynomials are usually separated
  60. | into two parts evaluated independently before being added up.
  61. |
  62. | Copyright (C) Motorola, Inc. 1990
  63. | All Rights Reserved
  64. |
  65. | For details on the license for this file, please see the
  66. | file, README, in this same directory.
  67. |slogn idnt 2,1 | Motorola 040 Floating Point Software Package
  68. |section 8
  69. #include "fpsp.h"
  70. BOUNDS1: .long 0x3FFEF07D,0x3FFF8841
  71. BOUNDS2: .long 0x3FFE8000,0x3FFFC000
  72. LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
  73. one: .long 0x3F800000
  74. zero: .long 0x00000000
  75. infty: .long 0x7F800000
  76. negone: .long 0xBF800000
  77. LOGA6: .long 0x3FC2499A,0xB5E4040B
  78. LOGA5: .long 0xBFC555B5,0x848CB7DB
  79. LOGA4: .long 0x3FC99999,0x987D8730
  80. LOGA3: .long 0xBFCFFFFF,0xFF6F7E97
  81. LOGA2: .long 0x3FD55555,0x555555a4
  82. LOGA1: .long 0xBFE00000,0x00000008
  83. LOGB5: .long 0x3F175496,0xADD7DAD6
  84. LOGB4: .long 0x3F3C71C2,0xFE80C7E0
  85. LOGB3: .long 0x3F624924,0x928BCCFF
  86. LOGB2: .long 0x3F899999,0x999995EC
  87. LOGB1: .long 0x3FB55555,0x55555555
  88. TWO: .long 0x40000000,0x00000000
  89. LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000
  90. LOGTBL:
  91. .long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
  92. .long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000
  93. .long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
  94. .long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
  95. .long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
  96. .long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
  97. .long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
  98. .long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
  99. .long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
  100. .long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
  101. .long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
  102. .long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
  103. .long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
  104. .long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
  105. .long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
  106. .long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
  107. .long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
  108. .long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
  109. .long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
  110. .long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
  111. .long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
  112. .long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
  113. .long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
  114. .long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
  115. .long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
  116. .long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
  117. .long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
  118. .long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
  119. .long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
  120. .long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
  121. .long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
  122. .long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
  123. .long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
  124. .long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
  125. .long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
  126. .long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
  127. .long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
  128. .long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
  129. .long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
  130. .long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
  131. .long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
  132. .long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
  133. .long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
  134. .long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
  135. .long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
  136. .long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
  137. .long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
  138. .long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
  139. .long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
  140. .long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
  141. .long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
  142. .long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
  143. .long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
  144. .long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
  145. .long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
  146. .long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
  147. .long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
  148. .long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
  149. .long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
  150. .long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
  151. .long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
  152. .long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
  153. .long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
  154. .long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
  155. .long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
  156. .long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
  157. .long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
  158. .long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
  159. .long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
  160. .long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
  161. .long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
  162. .long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
  163. .long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
  164. .long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
  165. .long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
  166. .long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
  167. .long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
  168. .long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
  169. .long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
  170. .long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
  171. .long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
  172. .long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
  173. .long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
  174. .long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
  175. .long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
  176. .long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000
  177. .long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000
  178. .long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
  179. .long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
  180. .long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
  181. .long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000
  182. .long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
  183. .long 0x3FFE0000,0x94458094,0x45809446,0x00000000
  184. .long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
  185. .long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000
  186. .long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
  187. .long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
  188. .long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
  189. .long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
  190. .long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
  191. .long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
  192. .long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
  193. .long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
  194. .long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
  195. .long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
  196. .long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
  197. .long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
  198. .long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
  199. .long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
  200. .long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
  201. .long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
  202. .long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
  203. .long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
  204. .long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
  205. .long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
  206. .long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
  207. .long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
  208. .long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
  209. .long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
  210. .long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
  211. .long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
  212. .long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
  213. .long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
  214. .long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
  215. .long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
  216. .long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
  217. .long 0x3FFE0000,0x80808080,0x80808081,0x00000000
  218. .long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
  219. .set ADJK,L_SCR1
  220. .set X,FP_SCR1
  221. .set XDCARE,X+2
  222. .set XFRAC,X+4
  223. .set F,FP_SCR2
  224. .set FFRAC,F+4
  225. .set KLOG2,FP_SCR3
  226. .set SAVEU,FP_SCR4
  227. | xref t_frcinx
  228. |xref t_extdnrm
  229. |xref t_operr
  230. |xref t_dz
  231. .global slognd
  232. slognd:
  233. |--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
  234. movel #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0
  235. |----normalize the input value by left shifting k bits (k to be determined
  236. |----below), adjusting exponent and storing -k to ADJK
  237. |----the value TWOTO100 is no longer needed.
  238. |----Note that this code assumes the denormalized input is NON-ZERO.
  239. moveml %d2-%d7,-(%a7) | ...save some registers
  240. movel #0x00000000,%d3 | ...D3 is exponent of smallest norm. #
  241. movel 4(%a0),%d4
  242. movel 8(%a0),%d5 | ...(D4,D5) is (Hi_X,Lo_X)
  243. clrl %d2 | ...D2 used for holding K
  244. tstl %d4
  245. bnes HiX_not0
  246. HiX_0:
  247. movel %d5,%d4
  248. clrl %d5
  249. movel #32,%d2
  250. clrl %d6
  251. bfffo %d4{#0:#32},%d6
  252. lsll %d6,%d4
  253. addl %d6,%d2 | ...(D3,D4,D5) is normalized
  254. movel %d3,X(%a6)
  255. movel %d4,XFRAC(%a6)
  256. movel %d5,XFRAC+4(%a6)
  257. negl %d2
  258. movel %d2,ADJK(%a6)
  259. fmovex X(%a6),%fp0
  260. moveml (%a7)+,%d2-%d7 | ...restore registers
  261. lea X(%a6),%a0
  262. bras LOGBGN | ...begin regular log(X)
  263. HiX_not0:
  264. clrl %d6
  265. bfffo %d4{#0:#32},%d6 | ...find first 1
  266. movel %d6,%d2 | ...get k
  267. lsll %d6,%d4
  268. movel %d5,%d7 | ...a copy of D5
  269. lsll %d6,%d5
  270. negl %d6
  271. addil #32,%d6
  272. lsrl %d6,%d7
  273. orl %d7,%d4 | ...(D3,D4,D5) normalized
  274. movel %d3,X(%a6)
  275. movel %d4,XFRAC(%a6)
  276. movel %d5,XFRAC+4(%a6)
  277. negl %d2
  278. movel %d2,ADJK(%a6)
  279. fmovex X(%a6),%fp0
  280. moveml (%a7)+,%d2-%d7 | ...restore registers
  281. lea X(%a6),%a0
  282. bras LOGBGN | ...begin regular log(X)
  283. .global slogn
  284. slogn:
  285. |--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
  286. fmovex (%a0),%fp0 | ...LOAD INPUT
  287. movel #0x00000000,ADJK(%a6)
  288. LOGBGN:
  289. |--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
  290. |--A FINITE, NON-ZERO, NORMALIZED NUMBER.
  291. movel (%a0),%d0
  292. movew 4(%a0),%d0
  293. movel (%a0),X(%a6)
  294. movel 4(%a0),X+4(%a6)
  295. movel 8(%a0),X+8(%a6)
  296. cmpil #0,%d0 | ...CHECK IF X IS NEGATIVE
  297. blt LOGNEG | ...LOG OF NEGATIVE ARGUMENT IS INVALID
  298. cmp2l BOUNDS1,%d0 | ...X IS POSITIVE, CHECK IF X IS NEAR 1
  299. bcc LOGNEAR1 | ...BOUNDS IS ROUGHLY [15/16, 17/16]
  300. LOGMAIN:
  301. |--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
  302. |--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
  303. |--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
  304. |--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
  305. |-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
  306. |--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
  307. |--LOG(1+U) CAN BE VERY EFFICIENT.
  308. |--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
  309. |--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
  310. |--GET K, Y, F, AND ADDRESS OF 1/F.
  311. asrl #8,%d0
  312. asrl #8,%d0 | ...SHIFTED 16 BITS, BIASED EXPO. OF X
  313. subil #0x3FFF,%d0 | ...THIS IS K
  314. addl ADJK(%a6),%d0 | ...ADJUST K, ORIGINAL INPUT MAY BE DENORM.
  315. lea LOGTBL,%a0 | ...BASE ADDRESS OF 1/F AND LOG(F)
  316. fmovel %d0,%fp1 | ...CONVERT K TO FLOATING-POINT FORMAT
  317. |--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
  318. movel #0x3FFF0000,X(%a6) | ...X IS NOW Y, I.E. 2^(-K)*X
  319. movel XFRAC(%a6),FFRAC(%a6)
  320. andil #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
  321. oril #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
  322. movel FFRAC(%a6),%d0 | ...READY TO GET ADDRESS OF 1/F
  323. andil #0x7E000000,%d0
  324. asrl #8,%d0
  325. asrl #8,%d0
  326. asrl #4,%d0 | ...SHIFTED 20, D0 IS THE DISPLACEMENT
  327. addal %d0,%a0 | ...A0 IS THE ADDRESS FOR 1/F
  328. fmovex X(%a6),%fp0
  329. movel #0x3fff0000,F(%a6)
  330. clrl F+8(%a6)
  331. fsubx F(%a6),%fp0 | ...Y-F
  332. fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 WHILE FP0 IS NOT READY
  333. |--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
  334. |--REGISTERS SAVED: FPCR, FP1, FP2
  335. LP1CONT1:
  336. |--AN RE-ENTRY POINT FOR LOGNP1
  337. fmulx (%a0),%fp0 | ...FP0 IS U = (Y-F)/F
  338. fmulx LOGOF2,%fp1 | ...GET K*LOG2 WHILE FP0 IS NOT READY
  339. fmovex %fp0,%fp2
  340. fmulx %fp2,%fp2 | ...FP2 IS V=U*U
  341. fmovex %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1
  342. |--LOG(1+U) IS APPROXIMATED BY
  343. |--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
  344. |--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
  345. fmovex %fp2,%fp3
  346. fmovex %fp2,%fp1
  347. fmuld LOGA6,%fp1 | ...V*A6
  348. fmuld LOGA5,%fp2 | ...V*A5
  349. faddd LOGA4,%fp1 | ...A4+V*A6
  350. faddd LOGA3,%fp2 | ...A3+V*A5
  351. fmulx %fp3,%fp1 | ...V*(A4+V*A6)
  352. fmulx %fp3,%fp2 | ...V*(A3+V*A5)
  353. faddd LOGA2,%fp1 | ...A2+V*(A4+V*A6)
  354. faddd LOGA1,%fp2 | ...A1+V*(A3+V*A5)
  355. fmulx %fp3,%fp1 | ...V*(A2+V*(A4+V*A6))
  356. addal #16,%a0 | ...ADDRESS OF LOG(F)
  357. fmulx %fp3,%fp2 | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
  358. fmulx %fp0,%fp1 | ...U*V*(A2+V*(A4+V*A6))
  359. faddx %fp2,%fp0 | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
  360. faddx (%a0),%fp1 | ...LOG(F)+U*V*(A2+V*(A4+V*A6))
  361. fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...RESTORE FP2
  362. faddx %fp1,%fp0 | ...FP0 IS LOG(F) + LOG(1+U)
  363. fmovel %d1,%fpcr
  364. faddx KLOG2(%a6),%fp0 | ...FINAL ADD
  365. bra t_frcinx
  366. LOGNEAR1:
  367. |--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
  368. fmovex %fp0,%fp1
  369. fsubs one,%fp1 | ...FP1 IS X-1
  370. fadds one,%fp0 | ...FP0 IS X+1
  371. faddx %fp1,%fp1 | ...FP1 IS 2(X-1)
  372. |--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
  373. |--IN U, U = 2(X-1)/(X+1) = FP1/FP0
  374. LP1CONT2:
  375. |--THIS IS AN RE-ENTRY POINT FOR LOGNP1
  376. fdivx %fp0,%fp1 | ...FP1 IS U
  377. fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2
  378. |--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
  379. |--LET V=U*U, W=V*V, CALCULATE
  380. |--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
  381. |--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
  382. fmovex %fp1,%fp0
  383. fmulx %fp0,%fp0 | ...FP0 IS V
  384. fmovex %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
  385. fmovex %fp0,%fp1
  386. fmulx %fp1,%fp1 | ...FP1 IS W
  387. fmoved LOGB5,%fp3
  388. fmoved LOGB4,%fp2
  389. fmulx %fp1,%fp3 | ...W*B5
  390. fmulx %fp1,%fp2 | ...W*B4
  391. faddd LOGB3,%fp3 | ...B3+W*B5
  392. faddd LOGB2,%fp2 | ...B2+W*B4
  393. fmulx %fp3,%fp1 | ...W*(B3+W*B5), FP3 RELEASED
  394. fmulx %fp0,%fp2 | ...V*(B2+W*B4)
  395. faddd LOGB1,%fp1 | ...B1+W*(B3+W*B5)
  396. fmulx SAVEU(%a6),%fp0 | ...FP0 IS U*V
  397. faddx %fp2,%fp1 | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
  398. fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED
  399. fmulx %fp1,%fp0 | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
  400. fmovel %d1,%fpcr
  401. faddx SAVEU(%a6),%fp0
  402. bra t_frcinx
  403. rts
  404. LOGNEG:
  405. |--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
  406. bra t_operr
  407. .global slognp1d
  408. slognp1d:
  409. |--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
  410. | Simply return the denorm
  411. bra t_extdnrm
  412. .global slognp1
  413. slognp1:
  414. |--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
  415. fmovex (%a0),%fp0 | ...LOAD INPUT
  416. fabsx %fp0 |test magnitude
  417. fcmpx LTHOLD,%fp0 |compare with min threshold
  418. fbgt LP1REAL |if greater, continue
  419. fmovel #0,%fpsr |clr N flag from compare
  420. fmovel %d1,%fpcr
  421. fmovex (%a0),%fp0 |return signed argument
  422. bra t_frcinx
  423. LP1REAL:
  424. fmovex (%a0),%fp0 | ...LOAD INPUT
  425. movel #0x00000000,ADJK(%a6)
  426. fmovex %fp0,%fp1 | ...FP1 IS INPUT Z
  427. fadds one,%fp0 | ...X := ROUND(1+Z)
  428. fmovex %fp0,X(%a6)
  429. movew XFRAC(%a6),XDCARE(%a6)
  430. movel X(%a6),%d0
  431. cmpil #0,%d0
  432. ble LP1NEG0 | ...LOG OF ZERO OR -VE
  433. cmp2l BOUNDS2,%d0
  434. bcs LOGMAIN | ...BOUNDS2 IS [1/2,3/2]
  435. |--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
  436. |--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
  437. |--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
  438. LP1NEAR1:
  439. |--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
  440. cmp2l BOUNDS1,%d0
  441. bcss LP1CARE
  442. LP1ONE16:
  443. |--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
  444. |--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
  445. faddx %fp1,%fp1 | ...FP1 IS 2Z
  446. fadds one,%fp0 | ...FP0 IS 1+X
  447. |--U = FP1/FP0
  448. bra LP1CONT2
  449. LP1CARE:
  450. |--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
  451. |--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
  452. |--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
  453. |--THERE ARE ONLY TWO CASES.
  454. |--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
  455. |--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
  456. |--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
  457. |--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
  458. movel XFRAC(%a6),FFRAC(%a6)
  459. andil #0xFE000000,FFRAC(%a6)
  460. oril #0x01000000,FFRAC(%a6) | ...F OBTAINED
  461. cmpil #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1
  462. bges KISZERO
  463. KISNEG1:
  464. fmoves TWO,%fp0
  465. movel #0x3fff0000,F(%a6)
  466. clrl F+8(%a6)
  467. fsubx F(%a6),%fp0 | ...2-F
  468. movel FFRAC(%a6),%d0
  469. andil #0x7E000000,%d0
  470. asrl #8,%d0
  471. asrl #8,%d0
  472. asrl #4,%d0 | ...D0 CONTAINS DISPLACEMENT FOR 1/F
  473. faddx %fp1,%fp1 | ...GET 2Z
  474. fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2
  475. faddx %fp1,%fp0 | ...FP0 IS Y-F = (2-F)+2Z
  476. lea LOGTBL,%a0 | ...A0 IS ADDRESS OF 1/F
  477. addal %d0,%a0
  478. fmoves negone,%fp1 | ...FP1 IS K = -1
  479. bra LP1CONT1
  480. KISZERO:
  481. fmoves one,%fp0
  482. movel #0x3fff0000,F(%a6)
  483. clrl F+8(%a6)
  484. fsubx F(%a6),%fp0 | ...1-F
  485. movel FFRAC(%a6),%d0
  486. andil #0x7E000000,%d0
  487. asrl #8,%d0
  488. asrl #8,%d0
  489. asrl #4,%d0
  490. faddx %fp1,%fp0 | ...FP0 IS Y-F
  491. fmovemx %fp2-%fp2/%fp3,-(%sp) | ...FP2 SAVED
  492. lea LOGTBL,%a0
  493. addal %d0,%a0 | ...A0 IS ADDRESS OF 1/F
  494. fmoves zero,%fp1 | ...FP1 IS K = 0
  495. bra LP1CONT1
  496. LP1NEG0:
  497. |--FPCR SAVED. D0 IS X IN COMPACT FORM.
  498. cmpil #0,%d0
  499. blts LP1NEG
  500. LP1ZERO:
  501. fmoves negone,%fp0
  502. fmovel %d1,%fpcr
  503. bra t_dz
  504. LP1NEG:
  505. fmoves zero,%fp0
  506. fmovel %d1,%fpcr
  507. bra t_operr
  508. |end