vector.S 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486
  1. #include <asm/processor.h>
  2. #include <asm/ppc_asm.h>
  3. #include <asm/reg.h>
  4. #include <asm/asm-offsets.h>
  5. #include <asm/cputable.h>
  6. #include <asm/thread_info.h>
  7. #include <asm/page.h>
  8. #include <asm/ptrace.h>
  9. #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
  10. /* void do_load_up_transact_altivec(struct thread_struct *thread)
  11. *
  12. * This is similar to load_up_altivec but for the transactional version of the
  13. * vector regs. It doesn't mess with the task MSR or valid flags.
  14. * Furthermore, VEC laziness is not supported with TM currently.
  15. */
  16. _GLOBAL(do_load_up_transact_altivec)
  17. mfmsr r6
  18. oris r5,r6,MSR_VEC@h
  19. MTMSRD(r5)
  20. isync
  21. li r4,1
  22. stw r4,THREAD_USED_VR(r3)
  23. li r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR
  24. lvx v0,r10,r3
  25. mtvscr v0
  26. addi r10,r3,THREAD_TRANSACT_VRSTATE
  27. REST_32VRS(0,r4,r10)
  28. /* Disable VEC again. */
  29. MTMSRD(r6)
  30. isync
  31. blr
  32. #endif
  33. /*
  34. * Enable use of VMX/Altivec for the caller.
  35. */
  36. _GLOBAL(vec_enable)
  37. mfmsr r3
  38. oris r3,r3,MSR_VEC@h
  39. MTMSRD(r3)
  40. isync
  41. blr
  42. /*
  43. * Load state from memory into VMX registers including VSCR.
  44. * Assumes the caller has enabled VMX in the MSR.
  45. */
  46. _GLOBAL(load_vr_state)
  47. li r4,VRSTATE_VSCR
  48. lvx v0,r4,r3
  49. mtvscr v0
  50. REST_32VRS(0,r4,r3)
  51. blr
  52. /*
  53. * Store VMX state into memory, including VSCR.
  54. * Assumes the caller has enabled VMX in the MSR.
  55. */
  56. _GLOBAL(store_vr_state)
  57. SAVE_32VRS(0, r4, r3)
  58. mfvscr v0
  59. li r4, VRSTATE_VSCR
  60. stvx v0, r4, r3
  61. blr
  62. /*
  63. * Disable VMX for the task which had it previously,
  64. * and save its vector registers in its thread_struct.
  65. * Enables the VMX for use in the kernel on return.
  66. * On SMP we know the VMX is free, since we give it up every
  67. * switch (ie, no lazy save of the vector registers).
  68. *
  69. * Note that on 32-bit this can only use registers that will be
  70. * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
  71. */
  72. _GLOBAL(load_up_altivec)
  73. mfmsr r5 /* grab the current MSR */
  74. oris r5,r5,MSR_VEC@h
  75. MTMSRD(r5) /* enable use of AltiVec now */
  76. isync
  77. /*
  78. * For SMP, we don't do lazy VMX switching because it just gets too
  79. * horrendously complex, especially when a task switches from one CPU
  80. * to another. Instead we call giveup_altvec in switch_to.
  81. * VRSAVE isn't dealt with here, that is done in the normal context
  82. * switch code. Note that we could rely on vrsave value to eventually
  83. * avoid saving all of the VREGs here...
  84. */
  85. #ifndef CONFIG_SMP
  86. LOAD_REG_ADDRBASE(r3, last_task_used_altivec)
  87. toreal(r3)
  88. PPC_LL r4,ADDROFF(last_task_used_altivec)(r3)
  89. PPC_LCMPI 0,r4,0
  90. beq 1f
  91. /* Save VMX state to last_task_used_altivec's THREAD struct */
  92. toreal(r4)
  93. addi r4,r4,THREAD
  94. addi r6,r4,THREAD_VRSTATE
  95. SAVE_32VRS(0,r5,r6)
  96. mfvscr v0
  97. li r10,VRSTATE_VSCR
  98. stvx v0,r10,r6
  99. /* Disable VMX for last_task_used_altivec */
  100. PPC_LL r5,PT_REGS(r4)
  101. toreal(r5)
  102. PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
  103. lis r10,MSR_VEC@h
  104. andc r4,r4,r10
  105. PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
  106. 1:
  107. #endif /* CONFIG_SMP */
  108. /* Hack: if we get an altivec unavailable trap with VRSAVE
  109. * set to all zeros, we assume this is a broken application
  110. * that fails to set it properly, and thus we switch it to
  111. * all 1's
  112. */
  113. mfspr r4,SPRN_VRSAVE
  114. cmpwi 0,r4,0
  115. bne+ 1f
  116. li r4,-1
  117. mtspr SPRN_VRSAVE,r4
  118. 1:
  119. /* enable use of VMX after return */
  120. #ifdef CONFIG_PPC32
  121. mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
  122. oris r9,r9,MSR_VEC@h
  123. #else
  124. ld r4,PACACURRENT(r13)
  125. addi r5,r4,THREAD /* Get THREAD */
  126. oris r12,r12,MSR_VEC@h
  127. std r12,_MSR(r1)
  128. #endif
  129. addi r6,r5,THREAD_VRSTATE
  130. li r4,1
  131. li r10,VRSTATE_VSCR
  132. stw r4,THREAD_USED_VR(r5)
  133. lvx v0,r10,r6
  134. mtvscr v0
  135. REST_32VRS(0,r4,r6)
  136. #ifndef CONFIG_SMP
  137. /* Update last_task_used_altivec to 'current' */
  138. subi r4,r5,THREAD /* Back to 'current' */
  139. fromreal(r4)
  140. PPC_STL r4,ADDROFF(last_task_used_altivec)(r3)
  141. #endif /* CONFIG_SMP */
  142. /* restore registers and return */
  143. blr
  144. _GLOBAL(giveup_altivec_notask)
  145. mfmsr r3
  146. andis. r4,r3,MSR_VEC@h
  147. bnelr /* Already enabled? */
  148. oris r3,r3,MSR_VEC@h
  149. SYNC
  150. MTMSRD(r3) /* enable use of VMX now */
  151. isync
  152. blr
  153. /*
  154. * giveup_altivec(tsk)
  155. * Disable VMX for the task given as the argument,
  156. * and save the vector registers in its thread_struct.
  157. * Enables the VMX for use in the kernel on return.
  158. */
  159. _GLOBAL(giveup_altivec)
  160. mfmsr r5
  161. oris r5,r5,MSR_VEC@h
  162. SYNC
  163. MTMSRD(r5) /* enable use of VMX now */
  164. isync
  165. PPC_LCMPI 0,r3,0
  166. beqlr /* if no previous owner, done */
  167. addi r3,r3,THREAD /* want THREAD of task */
  168. PPC_LL r7,THREAD_VRSAVEAREA(r3)
  169. PPC_LL r5,PT_REGS(r3)
  170. PPC_LCMPI 0,r7,0
  171. bne 2f
  172. addi r7,r3,THREAD_VRSTATE
  173. 2: PPC_LCMPI 0,r5,0
  174. SAVE_32VRS(0,r4,r7)
  175. mfvscr v0
  176. li r4,VRSTATE_VSCR
  177. stvx v0,r4,r7
  178. beq 1f
  179. PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
  180. #ifdef CONFIG_VSX
  181. BEGIN_FTR_SECTION
  182. lis r3,(MSR_VEC|MSR_VSX)@h
  183. FTR_SECTION_ELSE
  184. lis r3,MSR_VEC@h
  185. ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
  186. #else
  187. lis r3,MSR_VEC@h
  188. #endif
  189. andc r4,r4,r3 /* disable FP for previous task */
  190. PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
  191. 1:
  192. #ifndef CONFIG_SMP
  193. li r5,0
  194. LOAD_REG_ADDRBASE(r4,last_task_used_altivec)
  195. PPC_STL r5,ADDROFF(last_task_used_altivec)(r4)
  196. #endif /* CONFIG_SMP */
  197. blr
  198. #ifdef CONFIG_VSX
  199. #ifdef CONFIG_PPC32
  200. #error This asm code isn't ready for 32-bit kernels
  201. #endif
  202. /*
  203. * load_up_vsx(unused, unused, tsk)
  204. * Disable VSX for the task which had it previously,
  205. * and save its vector registers in its thread_struct.
  206. * Reuse the fp and vsx saves, but first check to see if they have
  207. * been saved already.
  208. */
  209. _GLOBAL(load_up_vsx)
  210. /* Load FP and VSX registers if they haven't been done yet */
  211. andi. r5,r12,MSR_FP
  212. beql+ load_up_fpu /* skip if already loaded */
  213. andis. r5,r12,MSR_VEC@h
  214. beql+ load_up_altivec /* skip if already loaded */
  215. #ifndef CONFIG_SMP
  216. ld r3,last_task_used_vsx@got(r2)
  217. ld r4,0(r3)
  218. cmpdi 0,r4,0
  219. beq 1f
  220. /* Disable VSX for last_task_used_vsx */
  221. addi r4,r4,THREAD
  222. ld r5,PT_REGS(r4)
  223. ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
  224. lis r6,MSR_VSX@h
  225. andc r6,r4,r6
  226. std r6,_MSR-STACK_FRAME_OVERHEAD(r5)
  227. 1:
  228. #endif /* CONFIG_SMP */
  229. ld r4,PACACURRENT(r13)
  230. addi r4,r4,THREAD /* Get THREAD */
  231. li r6,1
  232. stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
  233. /* enable use of VSX after return */
  234. oris r12,r12,MSR_VSX@h
  235. std r12,_MSR(r1)
  236. #ifndef CONFIG_SMP
  237. /* Update last_task_used_vsx to 'current' */
  238. ld r4,PACACURRENT(r13)
  239. std r4,0(r3)
  240. #endif /* CONFIG_SMP */
  241. b fast_exception_return
  242. /*
  243. * __giveup_vsx(tsk)
  244. * Disable VSX for the task given as the argument.
  245. * Does NOT save vsx registers.
  246. * Enables the VSX for use in the kernel on return.
  247. */
  248. _GLOBAL(__giveup_vsx)
  249. mfmsr r5
  250. oris r5,r5,MSR_VSX@h
  251. mtmsrd r5 /* enable use of VSX now */
  252. isync
  253. cmpdi 0,r3,0
  254. beqlr- /* if no previous owner, done */
  255. addi r3,r3,THREAD /* want THREAD of task */
  256. ld r5,PT_REGS(r3)
  257. cmpdi 0,r5,0
  258. beq 1f
  259. ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
  260. lis r3,MSR_VSX@h
  261. andc r4,r4,r3 /* disable VSX for previous task */
  262. std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
  263. 1:
  264. #ifndef CONFIG_SMP
  265. li r5,0
  266. ld r4,last_task_used_vsx@got(r2)
  267. std r5,0(r4)
  268. #endif /* CONFIG_SMP */
  269. blr
  270. #endif /* CONFIG_VSX */
  271. /*
  272. * The routines below are in assembler so we can closely control the
  273. * usage of floating-point registers. These routines must be called
  274. * with preempt disabled.
  275. */
  276. #ifdef CONFIG_PPC32
  277. .data
  278. fpzero:
  279. .long 0
  280. fpone:
  281. .long 0x3f800000 /* 1.0 in single-precision FP */
  282. fphalf:
  283. .long 0x3f000000 /* 0.5 in single-precision FP */
  284. #define LDCONST(fr, name) \
  285. lis r11,name@ha; \
  286. lfs fr,name@l(r11)
  287. #else
  288. .section ".toc","aw"
  289. fpzero:
  290. .tc FD_0_0[TC],0
  291. fpone:
  292. .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
  293. fphalf:
  294. .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
  295. #define LDCONST(fr, name) \
  296. lfd fr,name@toc(r2)
  297. #endif
  298. .text
  299. /*
  300. * Internal routine to enable floating point and set FPSCR to 0.
  301. * Don't call it from C; it doesn't use the normal calling convention.
  302. */
  303. fpenable:
  304. #ifdef CONFIG_PPC32
  305. stwu r1,-64(r1)
  306. #else
  307. stdu r1,-64(r1)
  308. #endif
  309. mfmsr r10
  310. ori r11,r10,MSR_FP
  311. mtmsr r11
  312. isync
  313. stfd fr0,24(r1)
  314. stfd fr1,16(r1)
  315. stfd fr31,8(r1)
  316. LDCONST(fr1, fpzero)
  317. mffs fr31
  318. MTFSF_L(fr1)
  319. blr
  320. fpdisable:
  321. mtlr r12
  322. MTFSF_L(fr31)
  323. lfd fr31,8(r1)
  324. lfd fr1,16(r1)
  325. lfd fr0,24(r1)
  326. mtmsr r10
  327. isync
  328. addi r1,r1,64
  329. blr
  330. /*
  331. * Vector add, floating point.
  332. */
  333. _GLOBAL(vaddfp)
  334. mflr r12
  335. bl fpenable
  336. li r0,4
  337. mtctr r0
  338. li r6,0
  339. 1: lfsx fr0,r4,r6
  340. lfsx fr1,r5,r6
  341. fadds fr0,fr0,fr1
  342. stfsx fr0,r3,r6
  343. addi r6,r6,4
  344. bdnz 1b
  345. b fpdisable
  346. /*
  347. * Vector subtract, floating point.
  348. */
  349. _GLOBAL(vsubfp)
  350. mflr r12
  351. bl fpenable
  352. li r0,4
  353. mtctr r0
  354. li r6,0
  355. 1: lfsx fr0,r4,r6
  356. lfsx fr1,r5,r6
  357. fsubs fr0,fr0,fr1
  358. stfsx fr0,r3,r6
  359. addi r6,r6,4
  360. bdnz 1b
  361. b fpdisable
  362. /*
  363. * Vector multiply and add, floating point.
  364. */
  365. _GLOBAL(vmaddfp)
  366. mflr r12
  367. bl fpenable
  368. stfd fr2,32(r1)
  369. li r0,4
  370. mtctr r0
  371. li r7,0
  372. 1: lfsx fr0,r4,r7
  373. lfsx fr1,r5,r7
  374. lfsx fr2,r6,r7
  375. fmadds fr0,fr0,fr2,fr1
  376. stfsx fr0,r3,r7
  377. addi r7,r7,4
  378. bdnz 1b
  379. lfd fr2,32(r1)
  380. b fpdisable
  381. /*
  382. * Vector negative multiply and subtract, floating point.
  383. */
  384. _GLOBAL(vnmsubfp)
  385. mflr r12
  386. bl fpenable
  387. stfd fr2,32(r1)
  388. li r0,4
  389. mtctr r0
  390. li r7,0
  391. 1: lfsx fr0,r4,r7
  392. lfsx fr1,r5,r7
  393. lfsx fr2,r6,r7
  394. fnmsubs fr0,fr0,fr2,fr1
  395. stfsx fr0,r3,r7
  396. addi r7,r7,4
  397. bdnz 1b
  398. lfd fr2,32(r1)
  399. b fpdisable
  400. /*
  401. * Vector reciprocal estimate. We just compute 1.0/x.
  402. * r3 -> destination, r4 -> source.
  403. */
  404. _GLOBAL(vrefp)
  405. mflr r12
  406. bl fpenable
  407. li r0,4
  408. LDCONST(fr1, fpone)
  409. mtctr r0
  410. li r6,0
  411. 1: lfsx fr0,r4,r6
  412. fdivs fr0,fr1,fr0
  413. stfsx fr0,r3,r6
  414. addi r6,r6,4
  415. bdnz 1b
  416. b fpdisable
  417. /*
  418. * Vector reciprocal square-root estimate, floating point.
  419. * We use the frsqrte instruction for the initial estimate followed
  420. * by 2 iterations of Newton-Raphson to get sufficient accuracy.
  421. * r3 -> destination, r4 -> source.
  422. */
  423. _GLOBAL(vrsqrtefp)
  424. mflr r12
  425. bl fpenable
  426. stfd fr2,32(r1)
  427. stfd fr3,40(r1)
  428. stfd fr4,48(r1)
  429. stfd fr5,56(r1)
  430. li r0,4
  431. LDCONST(fr4, fpone)
  432. LDCONST(fr5, fphalf)
  433. mtctr r0
  434. li r6,0
  435. 1: lfsx fr0,r4,r6
  436. frsqrte fr1,fr0 /* r = frsqrte(s) */
  437. fmuls fr3,fr1,fr0 /* r * s */
  438. fmuls fr2,fr1,fr5 /* r * 0.5 */
  439. fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
  440. fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
  441. fmuls fr3,fr1,fr0 /* r * s */
  442. fmuls fr2,fr1,fr5 /* r * 0.5 */
  443. fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
  444. fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
  445. stfsx fr1,r3,r6
  446. addi r6,r6,4
  447. bdnz 1b
  448. lfd fr5,56(r1)
  449. lfd fr4,48(r1)
  450. lfd fr3,40(r1)
  451. lfd fr2,32(r1)
  452. b fpdisable