usercopy.S 8.2 KB


  1. /*
  2. * arch/xtensa/lib/usercopy.S
  3. *
  4. * Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
  5. *
  6. * DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
  7. * It needs to remain separate and distinct. The hal files are part
  8. * of the Xtensa link-time HAL, and those files may differ per
  9. * processor configuration. Patching the kernel for another
  10. * processor configuration includes replacing the hal files, and we
  11. * could lose the special functionality for accessing user-space
  12. * memory during such a patch. We sacrifice a little code space here
  13. * in favor to simplify code maintenance.
  14. *
  15. * This file is subject to the terms and conditions of the GNU General
  16. * Public License. See the file "COPYING" in the main directory of
  17. * this archive for more details.
  18. *
  19. * Copyright (C) 2002 Tensilica Inc.
  20. */
  21. /*
  22. * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
  23. *
  24. * The returned value is the number of bytes not copied. Implies zero
  25. * is success.
  26. *
  27. * The general case algorithm is as follows:
  28. * If the destination and source are both aligned,
  29. * do 16B chunks with a loop, and then finish up with
  30. * 8B, 4B, 2B, and 1B copies conditional on the length.
  31. * If destination is aligned and source unaligned,
  32. * do the same, but use SRC to align the source data.
  33. * If destination is unaligned, align it by conditionally
  34. * copying 1B and 2B and then retest.
  35. * This code tries to use fall-through braches for the common
  36. * case of aligned destinations (except for the branches to
  37. * the alignment label).
  38. *
  39. * Register use:
  40. * a0/ return address
  41. * a1/ stack pointer
  42. * a2/ return value
  43. * a3/ src
  44. * a4/ length
  45. * a5/ dst
  46. * a6/ tmp
  47. * a7/ tmp
  48. * a8/ tmp
  49. * a9/ tmp
  50. * a10/ tmp
  51. * a11/ original length
  52. */
  53. #include <variant/core.h>
  54. #ifdef __XTENSA_EB__
  55. #define ALIGN(R, W0, W1) src R, W0, W1
  56. #define SSA8(R) ssa8b R
  57. #else
  58. #define ALIGN(R, W0, W1) src R, W1, W0
  59. #define SSA8(R) ssa8l R
  60. #endif
  61. /* Load or store instructions that may cause exceptions use the EX macro. */
  62. #define EX(insn,reg1,reg2,offset,handler) \
  63. 9: insn reg1, reg2, offset; \
  64. .section __ex_table, "a"; \
  65. .word 9b, handler; \
  66. .previous
  67. .text
  68. .align 4
  69. .global __xtensa_copy_user
  70. .type __xtensa_copy_user,@function
  71. __xtensa_copy_user:
  72. entry sp, 16 # minimal stack frame
  73. # a2/ dst, a3/ src, a4/ len
  74. mov a5, a2 # copy dst so that a2 is return value
  75. mov a11, a4 # preserve original len for error case
  76. .Lcommon:
  77. bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
  78. bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
  79. .Ldstaligned: # return here from .Ldstunaligned when dst is aligned
  80. srli a7, a4, 4 # number of loop iterations with 16B
  81. # per iteration
  82. movi a8, 3 # if source is also aligned,
  83. bnone a3, a8, .Laligned # then use word copy
  84. SSA8( a3) # set shift amount from byte offset
  85. bnez a4, .Lsrcunaligned
  86. movi a2, 0 # return success for len==0
  87. retw
  88. /*
  89. * Destination is unaligned
  90. */
  91. .Ldst1mod2: # dst is only byte aligned
  92. bltui a4, 7, .Lbytecopy # do short copies byte by byte
  93. # copy 1 byte
  94. EX(l8ui, a6, a3, 0, l_fixup)
  95. addi a3, a3, 1
  96. EX(s8i, a6, a5, 0, s_fixup)
  97. addi a5, a5, 1
  98. addi a4, a4, -1
  99. bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
  100. # return to main algorithm
  101. .Ldst2mod4: # dst 16-bit aligned
  102. # copy 2 bytes
  103. bltui a4, 6, .Lbytecopy # do short copies byte by byte
  104. EX(l8ui, a6, a3, 0, l_fixup)
  105. EX(l8ui, a7, a3, 1, l_fixup)
  106. addi a3, a3, 2
  107. EX(s8i, a6, a5, 0, s_fixup)
  108. EX(s8i, a7, a5, 1, s_fixup)
  109. addi a5, a5, 2
  110. addi a4, a4, -2
  111. j .Ldstaligned # dst is now aligned, return to main algorithm
  112. /*
  113. * Byte by byte copy
  114. */
  115. .align 4
  116. .byte 0 # 1 mod 4 alignment for LOOPNEZ
  117. # (0 mod 4 alignment for LBEG)
  118. .Lbytecopy:
  119. #if XCHAL_HAVE_LOOPS
  120. loopnez a4, .Lbytecopydone
  121. #else /* !XCHAL_HAVE_LOOPS */
  122. beqz a4, .Lbytecopydone
  123. add a7, a3, a4 # a7 = end address for source
  124. #endif /* !XCHAL_HAVE_LOOPS */
  125. .Lnextbyte:
  126. EX(l8ui, a6, a3, 0, l_fixup)
  127. addi a3, a3, 1
  128. EX(s8i, a6, a5, 0, s_fixup)
  129. addi a5, a5, 1
  130. #if !XCHAL_HAVE_LOOPS
  131. blt a3, a7, .Lnextbyte
  132. #endif /* !XCHAL_HAVE_LOOPS */
  133. .Lbytecopydone:
  134. movi a2, 0 # return success for len bytes copied
  135. retw
  136. /*
  137. * Destination and source are word-aligned.
  138. */
  139. # copy 16 bytes per iteration for word-aligned dst and word-aligned src
  140. .align 4 # 1 mod 4 alignment for LOOPNEZ
  141. .byte 0 # (0 mod 4 alignment for LBEG)
  142. .Laligned:
  143. #if XCHAL_HAVE_LOOPS
  144. loopnez a7, .Loop1done
  145. #else /* !XCHAL_HAVE_LOOPS */
  146. beqz a7, .Loop1done
  147. slli a8, a7, 4
  148. add a8, a8, a3 # a8 = end of last 16B source chunk
  149. #endif /* !XCHAL_HAVE_LOOPS */
  150. .Loop1:
  151. EX(l32i, a6, a3, 0, l_fixup)
  152. EX(l32i, a7, a3, 4, l_fixup)
  153. EX(s32i, a6, a5, 0, s_fixup)
  154. EX(l32i, a6, a3, 8, l_fixup)
  155. EX(s32i, a7, a5, 4, s_fixup)
  156. EX(l32i, a7, a3, 12, l_fixup)
  157. EX(s32i, a6, a5, 8, s_fixup)
  158. addi a3, a3, 16
  159. EX(s32i, a7, a5, 12, s_fixup)
  160. addi a5, a5, 16
  161. #if !XCHAL_HAVE_LOOPS
  162. blt a3, a8, .Loop1
  163. #endif /* !XCHAL_HAVE_LOOPS */
  164. .Loop1done:
  165. bbci.l a4, 3, .L2
  166. # copy 8 bytes
  167. EX(l32i, a6, a3, 0, l_fixup)
  168. EX(l32i, a7, a3, 4, l_fixup)
  169. addi a3, a3, 8
  170. EX(s32i, a6, a5, 0, s_fixup)
  171. EX(s32i, a7, a5, 4, s_fixup)
  172. addi a5, a5, 8
  173. .L2:
  174. bbci.l a4, 2, .L3
  175. # copy 4 bytes
  176. EX(l32i, a6, a3, 0, l_fixup)
  177. addi a3, a3, 4
  178. EX(s32i, a6, a5, 0, s_fixup)
  179. addi a5, a5, 4
  180. .L3:
  181. bbci.l a4, 1, .L4
  182. # copy 2 bytes
  183. EX(l16ui, a6, a3, 0, l_fixup)
  184. addi a3, a3, 2
  185. EX(s16i, a6, a5, 0, s_fixup)
  186. addi a5, a5, 2
  187. .L4:
  188. bbci.l a4, 0, .L5
  189. # copy 1 byte
  190. EX(l8ui, a6, a3, 0, l_fixup)
  191. EX(s8i, a6, a5, 0, s_fixup)
  192. .L5:
  193. movi a2, 0 # return success for len bytes copied
  194. retw
  195. /*
  196. * Destination is aligned, Source is unaligned
  197. */
  198. .align 4
  199. .byte 0 # 1 mod 4 alignement for LOOPNEZ
  200. # (0 mod 4 alignment for LBEG)
  201. .Lsrcunaligned:
  202. # copy 16 bytes per iteration for word-aligned dst and unaligned src
  203. and a10, a3, a8 # save unalignment offset for below
  204. sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware)
  205. EX(l32i, a6, a3, 0, l_fixup) # load first word
  206. #if XCHAL_HAVE_LOOPS
  207. loopnez a7, .Loop2done
  208. #else /* !XCHAL_HAVE_LOOPS */
  209. beqz a7, .Loop2done
  210. slli a12, a7, 4
  211. add a12, a12, a3 # a12 = end of last 16B source chunk
  212. #endif /* !XCHAL_HAVE_LOOPS */
  213. .Loop2:
  214. EX(l32i, a7, a3, 4, l_fixup)
  215. EX(l32i, a8, a3, 8, l_fixup)
  216. ALIGN( a6, a6, a7)
  217. EX(s32i, a6, a5, 0, s_fixup)
  218. EX(l32i, a9, a3, 12, l_fixup)
  219. ALIGN( a7, a7, a8)
  220. EX(s32i, a7, a5, 4, s_fixup)
  221. EX(l32i, a6, a3, 16, l_fixup)
  222. ALIGN( a8, a8, a9)
  223. EX(s32i, a8, a5, 8, s_fixup)
  224. addi a3, a3, 16
  225. ALIGN( a9, a9, a6)
  226. EX(s32i, a9, a5, 12, s_fixup)
  227. addi a5, a5, 16
  228. #if !XCHAL_HAVE_LOOPS
  229. blt a3, a12, .Loop2
  230. #endif /* !XCHAL_HAVE_LOOPS */
  231. .Loop2done:
  232. bbci.l a4, 3, .L12
  233. # copy 8 bytes
  234. EX(l32i, a7, a3, 4, l_fixup)
  235. EX(l32i, a8, a3, 8, l_fixup)
  236. ALIGN( a6, a6, a7)
  237. EX(s32i, a6, a5, 0, s_fixup)
  238. addi a3, a3, 8
  239. ALIGN( a7, a7, a8)
  240. EX(s32i, a7, a5, 4, s_fixup)
  241. addi a5, a5, 8
  242. mov a6, a8
  243. .L12:
  244. bbci.l a4, 2, .L13
  245. # copy 4 bytes
  246. EX(l32i, a7, a3, 4, l_fixup)
  247. addi a3, a3, 4
  248. ALIGN( a6, a6, a7)
  249. EX(s32i, a6, a5, 0, s_fixup)
  250. addi a5, a5, 4
  251. mov a6, a7
  252. .L13:
  253. add a3, a3, a10 # readjust a3 with correct misalignment
  254. bbci.l a4, 1, .L14
  255. # copy 2 bytes
  256. EX(l8ui, a6, a3, 0, l_fixup)
  257. EX(l8ui, a7, a3, 1, l_fixup)
  258. addi a3, a3, 2
  259. EX(s8i, a6, a5, 0, s_fixup)
  260. EX(s8i, a7, a5, 1, s_fixup)
  261. addi a5, a5, 2
  262. .L14:
  263. bbci.l a4, 0, .L15
  264. # copy 1 byte
  265. EX(l8ui, a6, a3, 0, l_fixup)
  266. EX(s8i, a6, a5, 0, s_fixup)
  267. .L15:
  268. movi a2, 0 # return success for len bytes copied
  269. retw
  270. .section .fixup, "ax"
  271. .align 4
  272. /* a2 = original dst; a5 = current dst; a11= original len
  273. * bytes_copied = a5 - a2
  274. * retval = bytes_not_copied = original len - bytes_copied
  275. * retval = a11 - (a5 - a2)
  276. *
  277. * Clearing the remaining pieces of kernel memory plugs security
  278. * holes. This functionality is the equivalent of the *_zeroing
  279. * functions that some architectures provide.
  280. */
  281. .Lmemset:
  282. .word memset
  283. s_fixup:
  284. sub a2, a5, a2 /* a2 <-- bytes copied */
  285. sub a2, a11, a2 /* a2 <-- bytes not copied */
  286. retw
  287. l_fixup:
  288. sub a2, a5, a2 /* a2 <-- bytes copied */
  289. sub a2, a11, a2 /* a2 <-- bytes not copied == return value */
  290. /* void *memset(void *s, int c, size_t n); */
  291. mov a6, a5 /* s */
  292. movi a7, 0 /* c */
  293. mov a8, a2 /* n */
  294. l32r a4, .Lmemset
  295. callx4 a4
  296. /* Ignore memset return value in a6. */
  297. /* a2 still contains bytes not copied. */
  298. retw