copy_user_64.S 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. /*
  2. * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
  3. * Copyright 2002 Andi Kleen, SuSE Labs.
  4. * Subject to the GNU Public License v2.
  5. *
  6. * Functions to copy from and to user space.
  7. */
  8. #include <linux/linkage.h>
  9. #include <asm/current.h>
  10. #include <asm/asm-offsets.h>
  11. #include <asm/thread_info.h>
  12. #include <asm/cpufeatures.h>
  13. #include <asm/alternative-asm.h>
  14. #include <asm/asm.h>
  15. #include <asm/smap.h>
  16. /* Standard copy_to_user with segment limit checking */
  17. ENTRY(_copy_to_user)
  18. GET_THREAD_INFO(%rax)
  19. movq %rdi,%rcx
  20. addq %rdx,%rcx
  21. jc bad_to_user
  22. cmpq TI_addr_limit(%rax),%rcx
  23. ja bad_to_user
  24. ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
  25. "jmp copy_user_generic_string", \
  26. X86_FEATURE_REP_GOOD, \
  27. "jmp copy_user_enhanced_fast_string", \
  28. X86_FEATURE_ERMS
  29. ENDPROC(_copy_to_user)
  30. /* Standard copy_from_user with segment limit checking */
  31. ENTRY(_copy_from_user)
  32. GET_THREAD_INFO(%rax)
  33. movq %rsi,%rcx
  34. addq %rdx,%rcx
  35. jc bad_from_user
  36. cmpq TI_addr_limit(%rax),%rcx
  37. ja bad_from_user
  38. ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
  39. "jmp copy_user_generic_string", \
  40. X86_FEATURE_REP_GOOD, \
  41. "jmp copy_user_enhanced_fast_string", \
  42. X86_FEATURE_ERMS
  43. ENDPROC(_copy_from_user)
  44. .section .fixup,"ax"
  45. /* must zero dest */
  46. ENTRY(bad_from_user)
  47. bad_from_user:
  48. movl %edx,%ecx
  49. xorl %eax,%eax
  50. rep
  51. stosb
  52. bad_to_user:
  53. movl %edx,%eax
  54. ret
  55. ENDPROC(bad_from_user)
  56. .previous
  57. /*
  58. * copy_user_generic_unrolled - memory copy with exception handling.
  59. * This version is for CPUs like P4 that don't have efficient micro
  60. * code for rep movsq
  61. *
  62. * Input:
  63. * rdi destination
  64. * rsi source
  65. * rdx count
  66. *
  67. * Output:
  68. * eax uncopied bytes or 0 if successful.
  69. */
  70. ENTRY(copy_user_generic_unrolled)
  71. ASM_STAC
  72. cmpl $8,%edx
  73. jb 20f /* less then 8 bytes, go to byte copy loop */
  74. ALIGN_DESTINATION
  75. movl %edx,%ecx
  76. andl $63,%edx
  77. shrl $6,%ecx
  78. jz .L_copy_short_string
  79. 1: movq (%rsi),%r8
  80. 2: movq 1*8(%rsi),%r9
  81. 3: movq 2*8(%rsi),%r10
  82. 4: movq 3*8(%rsi),%r11
  83. 5: movq %r8,(%rdi)
  84. 6: movq %r9,1*8(%rdi)
  85. 7: movq %r10,2*8(%rdi)
  86. 8: movq %r11,3*8(%rdi)
  87. 9: movq 4*8(%rsi),%r8
  88. 10: movq 5*8(%rsi),%r9
  89. 11: movq 6*8(%rsi),%r10
  90. 12: movq 7*8(%rsi),%r11
  91. 13: movq %r8,4*8(%rdi)
  92. 14: movq %r9,5*8(%rdi)
  93. 15: movq %r10,6*8(%rdi)
  94. 16: movq %r11,7*8(%rdi)
  95. leaq 64(%rsi),%rsi
  96. leaq 64(%rdi),%rdi
  97. decl %ecx
  98. jnz 1b
  99. .L_copy_short_string:
  100. movl %edx,%ecx
  101. andl $7,%edx
  102. shrl $3,%ecx
  103. jz 20f
  104. 18: movq (%rsi),%r8
  105. 19: movq %r8,(%rdi)
  106. leaq 8(%rsi),%rsi
  107. leaq 8(%rdi),%rdi
  108. decl %ecx
  109. jnz 18b
  110. 20: andl %edx,%edx
  111. jz 23f
  112. movl %edx,%ecx
  113. 21: movb (%rsi),%al
  114. 22: movb %al,(%rdi)
  115. incq %rsi
  116. incq %rdi
  117. decl %ecx
  118. jnz 21b
  119. 23: xor %eax,%eax
  120. ASM_CLAC
  121. ret
  122. .section .fixup,"ax"
  123. 30: shll $6,%ecx
  124. addl %ecx,%edx
  125. jmp 60f
  126. 40: leal (%rdx,%rcx,8),%edx
  127. jmp 60f
  128. 50: movl %ecx,%edx
  129. 60: jmp copy_user_handle_tail /* ecx is zerorest also */
  130. .previous
  131. _ASM_EXTABLE(1b,30b)
  132. _ASM_EXTABLE(2b,30b)
  133. _ASM_EXTABLE(3b,30b)
  134. _ASM_EXTABLE(4b,30b)
  135. _ASM_EXTABLE(5b,30b)
  136. _ASM_EXTABLE(6b,30b)
  137. _ASM_EXTABLE(7b,30b)
  138. _ASM_EXTABLE(8b,30b)
  139. _ASM_EXTABLE(9b,30b)
  140. _ASM_EXTABLE(10b,30b)
  141. _ASM_EXTABLE(11b,30b)
  142. _ASM_EXTABLE(12b,30b)
  143. _ASM_EXTABLE(13b,30b)
  144. _ASM_EXTABLE(14b,30b)
  145. _ASM_EXTABLE(15b,30b)
  146. _ASM_EXTABLE(16b,30b)
  147. _ASM_EXTABLE(18b,40b)
  148. _ASM_EXTABLE(19b,40b)
  149. _ASM_EXTABLE(21b,50b)
  150. _ASM_EXTABLE(22b,50b)
  151. ENDPROC(copy_user_generic_unrolled)
  152. /* Some CPUs run faster using the string copy instructions.
  153. * This is also a lot simpler. Use them when possible.
  154. *
  155. * Only 4GB of copy is supported. This shouldn't be a problem
  156. * because the kernel normally only writes from/to page sized chunks
  157. * even if user space passed a longer buffer.
  158. * And more would be dangerous because both Intel and AMD have
  159. * errata with rep movsq > 4GB. If someone feels the need to fix
  160. * this please consider this.
  161. *
  162. * Input:
  163. * rdi destination
  164. * rsi source
  165. * rdx count
  166. *
  167. * Output:
  168. * eax uncopied bytes or 0 if successful.
  169. */
  170. ENTRY(copy_user_generic_string)
  171. ASM_STAC
  172. cmpl $8,%edx
  173. jb 2f /* less than 8 bytes, go to byte copy loop */
  174. ALIGN_DESTINATION
  175. movl %edx,%ecx
  176. shrl $3,%ecx
  177. andl $7,%edx
  178. 1: rep
  179. movsq
  180. 2: movl %edx,%ecx
  181. 3: rep
  182. movsb
  183. xorl %eax,%eax
  184. ASM_CLAC
  185. ret
  186. .section .fixup,"ax"
  187. 11: leal (%rdx,%rcx,8),%ecx
  188. 12: movl %ecx,%edx /* ecx is zerorest also */
  189. jmp copy_user_handle_tail
  190. .previous
  191. _ASM_EXTABLE(1b,11b)
  192. _ASM_EXTABLE(3b,12b)
  193. ENDPROC(copy_user_generic_string)
  194. /*
  195. * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
  196. * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
  197. *
  198. * Input:
  199. * rdi destination
  200. * rsi source
  201. * rdx count
  202. *
  203. * Output:
  204. * eax uncopied bytes or 0 if successful.
  205. */
  206. ENTRY(copy_user_enhanced_fast_string)
  207. ASM_STAC
  208. cmpl $64,%edx
  209. jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
  210. movl %edx,%ecx
  211. 1: rep
  212. movsb
  213. xorl %eax,%eax
  214. ASM_CLAC
  215. ret
  216. .section .fixup,"ax"
  217. 12: movl %ecx,%edx /* ecx is zerorest also */
  218. jmp copy_user_handle_tail
  219. .previous
  220. _ASM_EXTABLE(1b,12b)
  221. ENDPROC(copy_user_enhanced_fast_string)
  222. /*
  223. * copy_user_nocache - Uncached memory copy with exception handling
  224. * This will force destination out of cache for more performance.
  225. *
  226. * Note: Cached memory copy is used when destination or size is not
  227. * naturally aligned. That is:
  228. * - Require 8-byte alignment when size is 8 bytes or larger.
  229. * - Require 4-byte alignment when size is 4 bytes.
  230. */
  231. ENTRY(__copy_user_nocache)
  232. ASM_STAC
  233. /* If size is less than 8 bytes, go to 4-byte copy */
  234. cmpl $8,%edx
  235. jb .L_4b_nocache_copy_entry
  236. /* If destination is not 8-byte aligned, "cache" copy to align it */
  237. ALIGN_DESTINATION
  238. /* Set 4x8-byte copy count and remainder */
  239. movl %edx,%ecx
  240. andl $63,%edx
  241. shrl $6,%ecx
  242. jz .L_8b_nocache_copy_entry /* jump if count is 0 */
  243. /* Perform 4x8-byte nocache loop-copy */
  244. .L_4x8b_nocache_copy_loop:
  245. 1: movq (%rsi),%r8
  246. 2: movq 1*8(%rsi),%r9
  247. 3: movq 2*8(%rsi),%r10
  248. 4: movq 3*8(%rsi),%r11
  249. 5: movnti %r8,(%rdi)
  250. 6: movnti %r9,1*8(%rdi)
  251. 7: movnti %r10,2*8(%rdi)
  252. 8: movnti %r11,3*8(%rdi)
  253. 9: movq 4*8(%rsi),%r8
  254. 10: movq 5*8(%rsi),%r9
  255. 11: movq 6*8(%rsi),%r10
  256. 12: movq 7*8(%rsi),%r11
  257. 13: movnti %r8,4*8(%rdi)
  258. 14: movnti %r9,5*8(%rdi)
  259. 15: movnti %r10,6*8(%rdi)
  260. 16: movnti %r11,7*8(%rdi)
  261. leaq 64(%rsi),%rsi
  262. leaq 64(%rdi),%rdi
  263. decl %ecx
  264. jnz .L_4x8b_nocache_copy_loop
  265. /* Set 8-byte copy count and remainder */
  266. .L_8b_nocache_copy_entry:
  267. movl %edx,%ecx
  268. andl $7,%edx
  269. shrl $3,%ecx
  270. jz .L_4b_nocache_copy_entry /* jump if count is 0 */
  271. /* Perform 8-byte nocache loop-copy */
  272. .L_8b_nocache_copy_loop:
  273. 20: movq (%rsi),%r8
  274. 21: movnti %r8,(%rdi)
  275. leaq 8(%rsi),%rsi
  276. leaq 8(%rdi),%rdi
  277. decl %ecx
  278. jnz .L_8b_nocache_copy_loop
  279. /* If no byte left, we're done */
  280. .L_4b_nocache_copy_entry:
  281. andl %edx,%edx
  282. jz .L_finish_copy
  283. /* If destination is not 4-byte aligned, go to byte copy: */
  284. movl %edi,%ecx
  285. andl $3,%ecx
  286. jnz .L_1b_cache_copy_entry
  287. /* Set 4-byte copy count (1 or 0) and remainder */
  288. movl %edx,%ecx
  289. andl $3,%edx
  290. shrl $2,%ecx
  291. jz .L_1b_cache_copy_entry /* jump if count is 0 */
  292. /* Perform 4-byte nocache copy: */
  293. 30: movl (%rsi),%r8d
  294. 31: movnti %r8d,(%rdi)
  295. leaq 4(%rsi),%rsi
  296. leaq 4(%rdi),%rdi
  297. /* If no bytes left, we're done: */
  298. andl %edx,%edx
  299. jz .L_finish_copy
  300. /* Perform byte "cache" loop-copy for the remainder */
  301. .L_1b_cache_copy_entry:
  302. movl %edx,%ecx
  303. .L_1b_cache_copy_loop:
  304. 40: movb (%rsi),%al
  305. 41: movb %al,(%rdi)
  306. incq %rsi
  307. incq %rdi
  308. decl %ecx
  309. jnz .L_1b_cache_copy_loop
  310. /* Finished copying; fence the prior stores */
  311. .L_finish_copy:
  312. xorl %eax,%eax
  313. ASM_CLAC
  314. sfence
  315. ret
  316. .section .fixup,"ax"
  317. .L_fixup_4x8b_copy:
  318. shll $6,%ecx
  319. addl %ecx,%edx
  320. jmp .L_fixup_handle_tail
  321. .L_fixup_8b_copy:
  322. lea (%rdx,%rcx,8),%rdx
  323. jmp .L_fixup_handle_tail
  324. .L_fixup_4b_copy:
  325. lea (%rdx,%rcx,4),%rdx
  326. jmp .L_fixup_handle_tail
  327. .L_fixup_1b_copy:
  328. movl %ecx,%edx
  329. .L_fixup_handle_tail:
  330. sfence
  331. jmp copy_user_handle_tail
  332. .previous
  333. _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
  334. _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
  335. _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
  336. _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
  337. _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
  338. _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
  339. _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
  340. _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
  341. _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
  342. _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
  343. _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
  344. _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
  345. _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
  346. _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
  347. _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
  348. _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
  349. _ASM_EXTABLE(20b,.L_fixup_8b_copy)
  350. _ASM_EXTABLE(21b,.L_fixup_8b_copy)
  351. _ASM_EXTABLE(30b,.L_fixup_4b_copy)
  352. _ASM_EXTABLE(31b,.L_fixup_4b_copy)
  353. _ASM_EXTABLE(40b,.L_fixup_1b_copy)
  354. _ASM_EXTABLE(41b,.L_fixup_1b_copy)
  355. ENDPROC(__copy_user_nocache)