memcpy_32.c 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. #include <linux/string.h>
  2. #include <linux/module.h>
  3. #undef memcpy
  4. #undef memset
  5. __visible void *memcpy(void *to, const void *from, size_t n)
  6. {
  7. #ifdef CONFIG_X86_USE_3DNOW
  8. return __memcpy3d(to, from, n);
  9. #else
  10. return __memcpy(to, from, n);
  11. #endif
  12. }
  13. EXPORT_SYMBOL(memcpy);
  14. __visible void *memset(void *s, int c, size_t count)
  15. {
  16. return __memset(s, c, count);
  17. }
  18. EXPORT_SYMBOL(memset);
  19. __visible void *memmove(void *dest, const void *src, size_t n)
  20. {
  21. int d0,d1,d2,d3,d4,d5;
  22. char *ret = dest;
  23. __asm__ __volatile__(
  24. /* Handle more 16 bytes in loop */
  25. "cmp $0x10, %0\n\t"
  26. "jb 1f\n\t"
  27. /* Decide forward/backward copy mode */
  28. "cmp %2, %1\n\t"
  29. "jb 2f\n\t"
  30. /*
  31. * movs instruction have many startup latency
  32. * so we handle small size by general register.
  33. */
  34. "cmp $680, %0\n\t"
  35. "jb 3f\n\t"
  36. /*
  37. * movs instruction is only good for aligned case.
  38. */
  39. "mov %1, %3\n\t"
  40. "xor %2, %3\n\t"
  41. "and $0xff, %3\n\t"
  42. "jz 4f\n\t"
  43. "3:\n\t"
  44. "sub $0x10, %0\n\t"
  45. /*
  46. * We gobble 16 bytes forward in each loop.
  47. */
  48. "3:\n\t"
  49. "sub $0x10, %0\n\t"
  50. "mov 0*4(%1), %3\n\t"
  51. "mov 1*4(%1), %4\n\t"
  52. "mov %3, 0*4(%2)\n\t"
  53. "mov %4, 1*4(%2)\n\t"
  54. "mov 2*4(%1), %3\n\t"
  55. "mov 3*4(%1), %4\n\t"
  56. "mov %3, 2*4(%2)\n\t"
  57. "mov %4, 3*4(%2)\n\t"
  58. "lea 0x10(%1), %1\n\t"
  59. "lea 0x10(%2), %2\n\t"
  60. "jae 3b\n\t"
  61. "add $0x10, %0\n\t"
  62. "jmp 1f\n\t"
  63. /*
  64. * Handle data forward by movs.
  65. */
  66. ".p2align 4\n\t"
  67. "4:\n\t"
  68. "mov -4(%1, %0), %3\n\t"
  69. "lea -4(%2, %0), %4\n\t"
  70. "shr $2, %0\n\t"
  71. "rep movsl\n\t"
  72. "mov %3, (%4)\n\t"
  73. "jmp 11f\n\t"
  74. /*
  75. * Handle data backward by movs.
  76. */
  77. ".p2align 4\n\t"
  78. "6:\n\t"
  79. "mov (%1), %3\n\t"
  80. "mov %2, %4\n\t"
  81. "lea -4(%1, %0), %1\n\t"
  82. "lea -4(%2, %0), %2\n\t"
  83. "shr $2, %0\n\t"
  84. "std\n\t"
  85. "rep movsl\n\t"
  86. "mov %3,(%4)\n\t"
  87. "cld\n\t"
  88. "jmp 11f\n\t"
  89. /*
  90. * Start to prepare for backward copy.
  91. */
  92. ".p2align 4\n\t"
  93. "2:\n\t"
  94. "cmp $680, %0\n\t"
  95. "jb 5f\n\t"
  96. "mov %1, %3\n\t"
  97. "xor %2, %3\n\t"
  98. "and $0xff, %3\n\t"
  99. "jz 6b\n\t"
  100. /*
  101. * Calculate copy position to tail.
  102. */
  103. "5:\n\t"
  104. "add %0, %1\n\t"
  105. "add %0, %2\n\t"
  106. "sub $0x10, %0\n\t"
  107. /*
  108. * We gobble 16 bytes backward in each loop.
  109. */
  110. "7:\n\t"
  111. "sub $0x10, %0\n\t"
  112. "mov -1*4(%1), %3\n\t"
  113. "mov -2*4(%1), %4\n\t"
  114. "mov %3, -1*4(%2)\n\t"
  115. "mov %4, -2*4(%2)\n\t"
  116. "mov -3*4(%1), %3\n\t"
  117. "mov -4*4(%1), %4\n\t"
  118. "mov %3, -3*4(%2)\n\t"
  119. "mov %4, -4*4(%2)\n\t"
  120. "lea -0x10(%1), %1\n\t"
  121. "lea -0x10(%2), %2\n\t"
  122. "jae 7b\n\t"
  123. /*
  124. * Calculate copy position to head.
  125. */
  126. "add $0x10, %0\n\t"
  127. "sub %0, %1\n\t"
  128. "sub %0, %2\n\t"
  129. /*
  130. * Move data from 8 bytes to 15 bytes.
  131. */
  132. ".p2align 4\n\t"
  133. "1:\n\t"
  134. "cmp $8, %0\n\t"
  135. "jb 8f\n\t"
  136. "mov 0*4(%1), %3\n\t"
  137. "mov 1*4(%1), %4\n\t"
  138. "mov -2*4(%1, %0), %5\n\t"
  139. "mov -1*4(%1, %0), %1\n\t"
  140. "mov %3, 0*4(%2)\n\t"
  141. "mov %4, 1*4(%2)\n\t"
  142. "mov %5, -2*4(%2, %0)\n\t"
  143. "mov %1, -1*4(%2, %0)\n\t"
  144. "jmp 11f\n\t"
  145. /*
  146. * Move data from 4 bytes to 7 bytes.
  147. */
  148. ".p2align 4\n\t"
  149. "8:\n\t"
  150. "cmp $4, %0\n\t"
  151. "jb 9f\n\t"
  152. "mov 0*4(%1), %3\n\t"
  153. "mov -1*4(%1, %0), %4\n\t"
  154. "mov %3, 0*4(%2)\n\t"
  155. "mov %4, -1*4(%2, %0)\n\t"
  156. "jmp 11f\n\t"
  157. /*
  158. * Move data from 2 bytes to 3 bytes.
  159. */
  160. ".p2align 4\n\t"
  161. "9:\n\t"
  162. "cmp $2, %0\n\t"
  163. "jb 10f\n\t"
  164. "movw 0*2(%1), %%dx\n\t"
  165. "movw -1*2(%1, %0), %%bx\n\t"
  166. "movw %%dx, 0*2(%2)\n\t"
  167. "movw %%bx, -1*2(%2, %0)\n\t"
  168. "jmp 11f\n\t"
  169. /*
  170. * Move data for 1 byte.
  171. */
  172. ".p2align 4\n\t"
  173. "10:\n\t"
  174. "cmp $1, %0\n\t"
  175. "jb 11f\n\t"
  176. "movb (%1), %%cl\n\t"
  177. "movb %%cl, (%2)\n\t"
  178. ".p2align 4\n\t"
  179. "11:"
  180. : "=&c" (d0), "=&S" (d1), "=&D" (d2),
  181. "=r" (d3),"=r" (d4), "=r"(d5)
  182. :"0" (n),
  183. "1" (src),
  184. "2" (dest)
  185. :"memory");
  186. return ret;
  187. }
  188. EXPORT_SYMBOL(memmove);