vecemu.c 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. /*
  2. * Routines to emulate some Altivec/VMX instructions, specifically
  3. * those that can trap when given denormalized operands in Java mode.
  4. */
  5. #include <linux/kernel.h>
  6. #include <linux/errno.h>
  7. #include <linux/sched.h>
  8. #include <asm/ptrace.h>
  9. #include <asm/processor.h>
  10. #include <asm/uaccess.h>
  11. /* Functions in vector.S */
  12. extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
  13. extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
  14. extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  15. extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  16. extern void vrefp(vector128 *dst, vector128 *src);
  17. extern void vrsqrtefp(vector128 *dst, vector128 *src);
  18. extern void vexptep(vector128 *dst, vector128 *src);
  19. static unsigned int exp2s[8] = {
  20. 0x800000,
  21. 0x8b95c2,
  22. 0x9837f0,
  23. 0xa5fed7,
  24. 0xb504f3,
  25. 0xc5672a,
  26. 0xd744fd,
  27. 0xeac0c7
  28. };
  29. /*
  30. * Computes an estimate of 2^x. The `s' argument is the 32-bit
  31. * single-precision floating-point representation of x.
  32. */
  33. static unsigned int eexp2(unsigned int s)
  34. {
  35. int exp, pwr;
  36. unsigned int mant, frac;
  37. /* extract exponent field from input */
  38. exp = ((s >> 23) & 0xff) - 127;
  39. if (exp > 7) {
  40. /* check for NaN input */
  41. if (exp == 128 && (s & 0x7fffff) != 0)
  42. return s | 0x400000; /* return QNaN */
  43. /* 2^-big = 0, 2^+big = +Inf */
  44. return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
  45. }
  46. if (exp < -23)
  47. return 0x3f800000; /* 1.0 */
  48. /* convert to fixed point integer in 9.23 representation */
  49. pwr = (s & 0x7fffff) | 0x800000;
  50. if (exp > 0)
  51. pwr <<= exp;
  52. else
  53. pwr >>= -exp;
  54. if (s & 0x80000000)
  55. pwr = -pwr;
  56. /* extract integer part, which becomes exponent part of result */
  57. exp = (pwr >> 23) + 126;
  58. if (exp >= 254)
  59. return 0x7f800000;
  60. if (exp < -23)
  61. return 0;
  62. /* table lookup on top 3 bits of fraction to get mantissa */
  63. mant = exp2s[(pwr >> 20) & 7];
  64. /* linear interpolation using remaining 20 bits of fraction */
  65. asm("mulhwu %0,%1,%2" : "=r" (frac)
  66. : "r" (pwr << 12), "r" (0x172b83ff));
  67. asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
  68. mant += frac;
  69. if (exp >= 0)
  70. return mant + (exp << 23);
  71. /* denormalized result */
  72. exp = -exp;
  73. mant += 1 << (exp - 1);
  74. return mant >> exp;
  75. }
  76. /*
  77. * Computes an estimate of log_2(x). The `s' argument is the 32-bit
  78. * single-precision floating-point representation of x.
  79. */
  80. static unsigned int elog2(unsigned int s)
  81. {
  82. int exp, mant, lz, frac;
  83. exp = s & 0x7f800000;
  84. mant = s & 0x7fffff;
  85. if (exp == 0x7f800000) { /* Inf or NaN */
  86. if (mant != 0)
  87. s |= 0x400000; /* turn NaN into QNaN */
  88. return s;
  89. }
  90. if ((exp | mant) == 0) /* +0 or -0 */
  91. return 0xff800000; /* return -Inf */
  92. if (exp == 0) {
  93. /* denormalized */
  94. asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
  95. mant <<= lz - 8;
  96. exp = (-118 - lz) << 23;
  97. } else {
  98. mant |= 0x800000;
  99. exp -= 127 << 23;
  100. }
  101. if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */
  102. exp |= 0x400000; /* 0.5 * 2^23 */
  103. asm("mulhwu %0,%1,%2" : "=r" (mant)
  104. : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */
  105. }
  106. if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */
  107. exp |= 0x200000; /* 0.25 * 2^23 */
  108. asm("mulhwu %0,%1,%2" : "=r" (mant)
  109. : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */
  110. }
  111. if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */
  112. exp |= 0x100000; /* 0.125 * 2^23 */
  113. asm("mulhwu %0,%1,%2" : "=r" (mant)
  114. : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */
  115. }
  116. if (mant > 0x800000) { /* 1.0 * 2^23 */
  117. /* calculate (mant - 1) * 1.381097463 */
  118. /* 1.381097463 == 0.125 / (2^0.125 - 1) */
  119. asm("mulhwu %0,%1,%2" : "=r" (frac)
  120. : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
  121. exp += frac;
  122. }
  123. s = exp & 0x80000000;
  124. if (exp != 0) {
  125. if (s)
  126. exp = -exp;
  127. asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
  128. lz = 8 - lz;
  129. if (lz > 0)
  130. exp >>= lz;
  131. else if (lz < 0)
  132. exp <<= -lz;
  133. s += ((lz + 126) << 23) + exp;
  134. }
  135. return s;
  136. }
  137. #define VSCR_SAT 1
  138. static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
  139. {
  140. int exp, mant;
  141. exp = (x >> 23) & 0xff;
  142. mant = x & 0x7fffff;
  143. if (exp == 255 && mant != 0)
  144. return 0; /* NaN -> 0 */
  145. exp = exp - 127 + scale;
  146. if (exp < 0)
  147. return 0; /* round towards zero */
  148. if (exp >= 31) {
  149. /* saturate, unless the result would be -2^31 */
  150. if (x + (scale << 23) != 0xcf000000)
  151. *vscrp |= VSCR_SAT;
  152. return (x & 0x80000000)? 0x80000000: 0x7fffffff;
  153. }
  154. mant |= 0x800000;
  155. mant = (mant << 7) >> (30 - exp);
  156. return (x & 0x80000000)? -mant: mant;
  157. }
  158. static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
  159. {
  160. int exp;
  161. unsigned int mant;
  162. exp = (x >> 23) & 0xff;
  163. mant = x & 0x7fffff;
  164. if (exp == 255 && mant != 0)
  165. return 0; /* NaN -> 0 */
  166. exp = exp - 127 + scale;
  167. if (exp < 0)
  168. return 0; /* round towards zero */
  169. if (x & 0x80000000) {
  170. /* negative => saturate to 0 */
  171. *vscrp |= VSCR_SAT;
  172. return 0;
  173. }
  174. if (exp >= 32) {
  175. /* saturate */
  176. *vscrp |= VSCR_SAT;
  177. return 0xffffffff;
  178. }
  179. mant |= 0x800000;
  180. mant = (mant << 8) >> (31 - exp);
  181. return mant;
  182. }
  183. /* Round to floating integer, towards 0 */
  184. static unsigned int rfiz(unsigned int x)
  185. {
  186. int exp;
  187. exp = ((x >> 23) & 0xff) - 127;
  188. if (exp == 128 && (x & 0x7fffff) != 0)
  189. return x | 0x400000; /* NaN -> make it a QNaN */
  190. if (exp >= 23)
  191. return x; /* it's an integer already (or Inf) */
  192. if (exp < 0)
  193. return x & 0x80000000; /* |x| < 1.0 rounds to 0 */
  194. return x & ~(0x7fffff >> exp);
  195. }
  196. /* Round to floating integer, towards +/- Inf */
  197. static unsigned int rfii(unsigned int x)
  198. {
  199. int exp, mask;
  200. exp = ((x >> 23) & 0xff) - 127;
  201. if (exp == 128 && (x & 0x7fffff) != 0)
  202. return x | 0x400000; /* NaN -> make it a QNaN */
  203. if (exp >= 23)
  204. return x; /* it's an integer already (or Inf) */
  205. if ((x & 0x7fffffff) == 0)
  206. return x; /* +/-0 -> +/-0 */
  207. if (exp < 0)
  208. /* 0 < |x| < 1.0 rounds to +/- 1.0 */
  209. return (x & 0x80000000) | 0x3f800000;
  210. mask = 0x7fffff >> exp;
  211. /* mantissa overflows into exponent - that's OK,
  212. it can't overflow into the sign bit */
  213. return (x + mask) & ~mask;
  214. }
  215. /* Round to floating integer, to nearest */
  216. static unsigned int rfin(unsigned int x)
  217. {
  218. int exp, half;
  219. exp = ((x >> 23) & 0xff) - 127;
  220. if (exp == 128 && (x & 0x7fffff) != 0)
  221. return x | 0x400000; /* NaN -> make it a QNaN */
  222. if (exp >= 23)
  223. return x; /* it's an integer already (or Inf) */
  224. if (exp < -1)
  225. return x & 0x80000000; /* |x| < 0.5 -> +/-0 */
  226. if (exp == -1)
  227. /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
  228. return (x & 0x80000000) | 0x3f800000;
  229. half = 0x400000 >> exp;
  230. /* add 0.5 to the magnitude and chop off the fraction bits */
  231. return (x + half) & ~(0x7fffff >> exp);
  232. }
  233. int emulate_altivec(struct pt_regs *regs)
  234. {
  235. unsigned int instr, i;
  236. unsigned int va, vb, vc, vd;
  237. vector128 *vrs;
  238. if (get_user(instr, (unsigned int __user *) regs->nip))
  239. return -EFAULT;
  240. if ((instr >> 26) != 4)
  241. return -EINVAL; /* not an altivec instruction */
  242. vd = (instr >> 21) & 0x1f;
  243. va = (instr >> 16) & 0x1f;
  244. vb = (instr >> 11) & 0x1f;
  245. vc = (instr >> 6) & 0x1f;
  246. vrs = current->thread.vr_state.vr;
  247. switch (instr & 0x3f) {
  248. case 10:
  249. switch (vc) {
  250. case 0: /* vaddfp */
  251. vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
  252. break;
  253. case 1: /* vsubfp */
  254. vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
  255. break;
  256. case 4: /* vrefp */
  257. vrefp(&vrs[vd], &vrs[vb]);
  258. break;
  259. case 5: /* vrsqrtefp */
  260. vrsqrtefp(&vrs[vd], &vrs[vb]);
  261. break;
  262. case 6: /* vexptefp */
  263. for (i = 0; i < 4; ++i)
  264. vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
  265. break;
  266. case 7: /* vlogefp */
  267. for (i = 0; i < 4; ++i)
  268. vrs[vd].u[i] = elog2(vrs[vb].u[i]);
  269. break;
  270. case 8: /* vrfin */
  271. for (i = 0; i < 4; ++i)
  272. vrs[vd].u[i] = rfin(vrs[vb].u[i]);
  273. break;
  274. case 9: /* vrfiz */
  275. for (i = 0; i < 4; ++i)
  276. vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
  277. break;
  278. case 10: /* vrfip */
  279. for (i = 0; i < 4; ++i) {
  280. u32 x = vrs[vb].u[i];
  281. x = (x & 0x80000000)? rfiz(x): rfii(x);
  282. vrs[vd].u[i] = x;
  283. }
  284. break;
  285. case 11: /* vrfim */
  286. for (i = 0; i < 4; ++i) {
  287. u32 x = vrs[vb].u[i];
  288. x = (x & 0x80000000)? rfii(x): rfiz(x);
  289. vrs[vd].u[i] = x;
  290. }
  291. break;
  292. case 14: /* vctuxs */
  293. for (i = 0; i < 4; ++i)
  294. vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
  295. &current->thread.vr_state.vscr.u[3]);
  296. break;
  297. case 15: /* vctsxs */
  298. for (i = 0; i < 4; ++i)
  299. vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
  300. &current->thread.vr_state.vscr.u[3]);
  301. break;
  302. default:
  303. return -EINVAL;
  304. }
  305. break;
  306. case 46: /* vmaddfp */
  307. vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
  308. break;
  309. case 47: /* vnmsubfp */
  310. vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
  311. break;
  312. default:
  313. return -EINVAL;
  314. }
  315. return 0;
  316. }