memcpy-archs.S 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. /*
  2. * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License version 2 as
  6. * published by the Free Software Foundation.
  7. */
  8. #include <linux/linkage.h>
  9. #ifdef __LITTLE_ENDIAN__
  10. # define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
  11. # define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
  12. # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
  13. # define MERGE_2(RX,RY,IMM)
  14. # define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
  15. # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
  16. #else
  17. # define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
  18. # define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
  19. # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
  20. # define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
  21. # define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
  22. # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
  23. #endif
  24. #ifdef CONFIG_ARC_HAS_LL64
  25. # define PREFETCH_READ(RX) prefetch [RX, 56]
  26. # define PREFETCH_WRITE(RX) prefetchw [RX, 64]
  27. # define LOADX(DST,RX) ldd.ab DST, [RX, 8]
  28. # define STOREX(SRC,RX) std.ab SRC, [RX, 8]
  29. # define ZOLSHFT 5
  30. # define ZOLAND 0x1F
  31. #else
  32. # define PREFETCH_READ(RX) prefetch [RX, 28]
  33. # define PREFETCH_WRITE(RX) prefetchw [RX, 32]
  34. # define LOADX(DST,RX) ld.ab DST, [RX, 4]
  35. # define STOREX(SRC,RX) st.ab SRC, [RX, 4]
  36. # define ZOLSHFT 4
  37. # define ZOLAND 0xF
  38. #endif
  39. ENTRY(memcpy)
  40. prefetch [r1] ; Prefetch the read location
  41. prefetchw [r0] ; Prefetch the write location
  42. mov.f 0, r2
  43. ;;; if size is zero
  44. jz.d [blink]
  45. mov r3, r0 ; don;t clobber ret val
  46. ;;; if size <= 8
  47. cmp r2, 8
  48. bls.d @.Lsmallchunk
  49. mov.f lp_count, r2
  50. and.f r4, r0, 0x03
  51. rsub lp_count, r4, 4
  52. lpnz @.Laligndestination
  53. ;; LOOP BEGIN
  54. ldb.ab r5, [r1,1]
  55. sub r2, r2, 1
  56. stb.ab r5, [r3,1]
  57. .Laligndestination:
  58. ;;; Check the alignment of the source
  59. and.f r4, r1, 0x03
  60. bnz.d @.Lsourceunaligned
  61. ;;; CASE 0: Both source and destination are 32bit aligned
  62. ;;; Convert len to Dwords, unfold x4
  63. lsr.f lp_count, r2, ZOLSHFT
  64. lpnz @.Lcopy32_64bytes
  65. ;; LOOP START
  66. LOADX (r6, r1)
  67. PREFETCH_READ (r1)
  68. PREFETCH_WRITE (r3)
  69. LOADX (r8, r1)
  70. LOADX (r10, r1)
  71. LOADX (r4, r1)
  72. STOREX (r6, r3)
  73. STOREX (r8, r3)
  74. STOREX (r10, r3)
  75. STOREX (r4, r3)
  76. .Lcopy32_64bytes:
  77. and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
  78. .Lsmallchunk:
  79. lpnz @.Lcopyremainingbytes
  80. ;; LOOP START
  81. ldb.ab r5, [r1,1]
  82. stb.ab r5, [r3,1]
  83. .Lcopyremainingbytes:
  84. j [blink]
  85. ;;; END CASE 0
  86. .Lsourceunaligned:
  87. cmp r4, 2
  88. beq.d @.LunalignedOffby2
  89. sub r2, r2, 1
  90. bhi.d @.LunalignedOffby3
  91. ldb.ab r5, [r1, 1]
  92. ;;; CASE 1: The source is unaligned, off by 1
  93. ;; Hence I need to read 1 byte for a 16bit alignment
  94. ;; and 2bytes to reach 32bit alignment
  95. ldh.ab r6, [r1, 2]
  96. sub r2, r2, 2
  97. ;; Convert to words, unfold x2
  98. lsr.f lp_count, r2, 3
  99. MERGE_1 (r6, r6, 8)
  100. MERGE_2 (r5, r5, 24)
  101. or r5, r5, r6
  102. ;; Both src and dst are aligned
  103. lpnz @.Lcopy8bytes_1
  104. ;; LOOP START
  105. ld.ab r6, [r1, 4]
  106. prefetch [r1, 28] ;Prefetch the next read location
  107. ld.ab r8, [r1,4]
  108. prefetchw [r3, 32] ;Prefetch the next write location
  109. SHIFT_1 (r7, r6, 24)
  110. or r7, r7, r5
  111. SHIFT_2 (r5, r6, 8)
  112. SHIFT_1 (r9, r8, 24)
  113. or r9, r9, r5
  114. SHIFT_2 (r5, r8, 8)
  115. st.ab r7, [r3, 4]
  116. st.ab r9, [r3, 4]
  117. .Lcopy8bytes_1:
  118. ;; Write back the remaining 16bits
  119. EXTRACT_1 (r6, r5, 16)
  120. sth.ab r6, [r3, 2]
  121. ;; Write back the remaining 8bits
  122. EXTRACT_2 (r5, r5, 16)
  123. stb.ab r5, [r3, 1]
  124. and.f lp_count, r2, 0x07 ;Last 8bytes
  125. lpnz @.Lcopybytewise_1
  126. ;; LOOP START
  127. ldb.ab r6, [r1,1]
  128. stb.ab r6, [r3,1]
  129. .Lcopybytewise_1:
  130. j [blink]
  131. .LunalignedOffby2:
  132. ;;; CASE 2: The source is unaligned, off by 2
  133. ldh.ab r5, [r1, 2]
  134. sub r2, r2, 1
  135. ;; Both src and dst are aligned
  136. ;; Convert to words, unfold x2
  137. lsr.f lp_count, r2, 3
  138. #ifdef __BIG_ENDIAN__
  139. asl.nz r5, r5, 16
  140. #endif
  141. lpnz @.Lcopy8bytes_2
  142. ;; LOOP START
  143. ld.ab r6, [r1, 4]
  144. prefetch [r1, 28] ;Prefetch the next read location
  145. ld.ab r8, [r1,4]
  146. prefetchw [r3, 32] ;Prefetch the next write location
  147. SHIFT_1 (r7, r6, 16)
  148. or r7, r7, r5
  149. SHIFT_2 (r5, r6, 16)
  150. SHIFT_1 (r9, r8, 16)
  151. or r9, r9, r5
  152. SHIFT_2 (r5, r8, 16)
  153. st.ab r7, [r3, 4]
  154. st.ab r9, [r3, 4]
  155. .Lcopy8bytes_2:
  156. #ifdef __BIG_ENDIAN__
  157. lsr.nz r5, r5, 16
  158. #endif
  159. sth.ab r5, [r3, 2]
  160. and.f lp_count, r2, 0x07 ;Last 8bytes
  161. lpnz @.Lcopybytewise_2
  162. ;; LOOP START
  163. ldb.ab r6, [r1,1]
  164. stb.ab r6, [r3,1]
  165. .Lcopybytewise_2:
  166. j [blink]
  167. .LunalignedOffby3:
  168. ;;; CASE 3: The source is unaligned, off by 3
  169. ;;; Hence, I need to read 1byte for achieve the 32bit alignment
  170. ;; Both src and dst are aligned
  171. ;; Convert to words, unfold x2
  172. lsr.f lp_count, r2, 3
  173. #ifdef __BIG_ENDIAN__
  174. asl.ne r5, r5, 24
  175. #endif
  176. lpnz @.Lcopy8bytes_3
  177. ;; LOOP START
  178. ld.ab r6, [r1, 4]
  179. prefetch [r1, 28] ;Prefetch the next read location
  180. ld.ab r8, [r1,4]
  181. prefetchw [r3, 32] ;Prefetch the next write location
  182. SHIFT_1 (r7, r6, 8)
  183. or r7, r7, r5
  184. SHIFT_2 (r5, r6, 24)
  185. SHIFT_1 (r9, r8, 8)
  186. or r9, r9, r5
  187. SHIFT_2 (r5, r8, 24)
  188. st.ab r7, [r3, 4]
  189. st.ab r9, [r3, 4]
  190. .Lcopy8bytes_3:
  191. #ifdef __BIG_ENDIAN__
  192. lsr.nz r5, r5, 24
  193. #endif
  194. stb.ab r5, [r3, 1]
  195. and.f lp_count, r2, 0x07 ;Last 8bytes
  196. lpnz @.Lcopybytewise_3
  197. ;; LOOP START
  198. ldb.ab r6, [r1,1]
  199. stb.ab r6, [r3,1]
  200. .Lcopybytewise_3:
  201. j [blink]
  202. END(memcpy)