strcmp.S 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. /*
  2. * Copyright (C) 2013 ARM Ltd.
  3. * Copyright (C) 2013 Linaro.
  4. *
  5. * This code is based on glibc cortex strings work originally authored by Linaro
  6. * and re-licensed under GPLv2 for the Linux kernel. The original code can
  7. * be found @
  8. *
  9. * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
  10. * files/head:/src/aarch64/
  11. *
  12. * This program is free software; you can redistribute it and/or modify
  13. * it under the terms of the GNU General Public License version 2 as
  14. * published by the Free Software Foundation.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  23. */
  24. #include <linux/linkage.h>
  25. #include <asm/assembler.h>
  26. /*
  27. * compare two strings
  28. *
  29. * Parameters:
  30. * x0 - const string 1 pointer
  31. * x1 - const string 2 pointer
  32. * Returns:
  33. * x0 - an integer less than, equal to, or greater than zero
  34. * if s1 is found, respectively, to be less than, to match,
  35. * or be greater than s2.
  36. */
  37. #define REP8_01 0x0101010101010101
  38. #define REP8_7f 0x7f7f7f7f7f7f7f7f
  39. #define REP8_80 0x8080808080808080
  40. /* Parameters and result. */
  41. src1 .req x0
  42. src2 .req x1
  43. result .req x0
  44. /* Internal variables. */
  45. data1 .req x2
  46. data1w .req w2
  47. data2 .req x3
  48. data2w .req w3
  49. has_nul .req x4
  50. diff .req x5
  51. syndrome .req x6
  52. tmp1 .req x7
  53. tmp2 .req x8
  54. tmp3 .req x9
  55. zeroones .req x10
  56. pos .req x11
  57. ENTRY(strcmp)
  58. eor tmp1, src1, src2
  59. mov zeroones, #REP8_01
  60. tst tmp1, #7
  61. b.ne .Lmisaligned8
  62. ands tmp1, src1, #7
  63. b.ne .Lmutual_align
  64. /*
  65. * NUL detection works on the principle that (X - 1) & (~X) & 0x80
  66. * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
  67. * can be done in parallel across the entire word.
  68. */
  69. .Lloop_aligned:
  70. ldr data1, [src1], #8
  71. ldr data2, [src2], #8
  72. .Lstart_realigned:
  73. sub tmp1, data1, zeroones
  74. orr tmp2, data1, #REP8_7f
  75. eor diff, data1, data2 /* Non-zero if differences found. */
  76. bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
  77. orr syndrome, diff, has_nul
  78. cbz syndrome, .Lloop_aligned
  79. b .Lcal_cmpresult
  80. .Lmutual_align:
  81. /*
  82. * Sources are mutually aligned, but are not currently at an
  83. * alignment boundary. Round down the addresses and then mask off
  84. * the bytes that preceed the start point.
  85. */
  86. bic src1, src1, #7
  87. bic src2, src2, #7
  88. lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
  89. ldr data1, [src1], #8
  90. neg tmp1, tmp1 /* Bits to alignment -64. */
  91. ldr data2, [src2], #8
  92. mov tmp2, #~0
  93. /* Big-endian. Early bytes are at MSB. */
  94. CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
  95. /* Little-endian. Early bytes are at LSB. */
  96. CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
  97. orr data1, data1, tmp2
  98. orr data2, data2, tmp2
  99. b .Lstart_realigned
  100. .Lmisaligned8:
  101. /*
  102. * Get the align offset length to compare per byte first.
  103. * After this process, one string's address will be aligned.
  104. */
  105. and tmp1, src1, #7
  106. neg tmp1, tmp1
  107. add tmp1, tmp1, #8
  108. and tmp2, src2, #7
  109. neg tmp2, tmp2
  110. add tmp2, tmp2, #8
  111. subs tmp3, tmp1, tmp2
  112. csel pos, tmp1, tmp2, hi /*Choose the maximum. */
  113. .Ltinycmp:
  114. ldrb data1w, [src1], #1
  115. ldrb data2w, [src2], #1
  116. subs pos, pos, #1
  117. ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
  118. ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
  119. b.eq .Ltinycmp
  120. cbnz pos, 1f /*find the null or unequal...*/
  121. cmp data1w, #1
  122. ccmp data1w, data2w, #0, cs
  123. b.eq .Lstart_align /*the last bytes are equal....*/
  124. 1:
  125. sub result, data1, data2
  126. ret
  127. .Lstart_align:
  128. ands xzr, src1, #7
  129. b.eq .Lrecal_offset
  130. /*process more leading bytes to make str1 aligned...*/
  131. add src1, src1, tmp3
  132. add src2, src2, tmp3
  133. /*load 8 bytes from aligned str1 and non-aligned str2..*/
  134. ldr data1, [src1], #8
  135. ldr data2, [src2], #8
  136. sub tmp1, data1, zeroones
  137. orr tmp2, data1, #REP8_7f
  138. bic has_nul, tmp1, tmp2
  139. eor diff, data1, data2 /* Non-zero if differences found. */
  140. orr syndrome, diff, has_nul
  141. cbnz syndrome, .Lcal_cmpresult
  142. /*How far is the current str2 from the alignment boundary...*/
  143. and tmp3, tmp3, #7
  144. .Lrecal_offset:
  145. neg pos, tmp3
  146. .Lloopcmp_proc:
  147. /*
  148. * Divide the eight bytes into two parts. First,backwards the src2
  149. * to an alignment boundary,load eight bytes from the SRC2 alignment
  150. * boundary,then compare with the relative bytes from SRC1.
  151. * If all 8 bytes are equal,then start the second part's comparison.
  152. * Otherwise finish the comparison.
  153. * This special handle can garantee all the accesses are in the
  154. * thread/task space in avoid to overrange access.
  155. */
  156. ldr data1, [src1,pos]
  157. ldr data2, [src2,pos]
  158. sub tmp1, data1, zeroones
  159. orr tmp2, data1, #REP8_7f
  160. bic has_nul, tmp1, tmp2
  161. eor diff, data1, data2 /* Non-zero if differences found. */
  162. orr syndrome, diff, has_nul
  163. cbnz syndrome, .Lcal_cmpresult
  164. /*The second part process*/
  165. ldr data1, [src1], #8
  166. ldr data2, [src2], #8
  167. sub tmp1, data1, zeroones
  168. orr tmp2, data1, #REP8_7f
  169. bic has_nul, tmp1, tmp2
  170. eor diff, data1, data2 /* Non-zero if differences found. */
  171. orr syndrome, diff, has_nul
  172. cbz syndrome, .Lloopcmp_proc
  173. .Lcal_cmpresult:
  174. /*
  175. * reversed the byte-order as big-endian,then CLZ can find the most
  176. * significant zero bits.
  177. */
  178. CPU_LE( rev syndrome, syndrome )
  179. CPU_LE( rev data1, data1 )
  180. CPU_LE( rev data2, data2 )
  181. /*
  182. * For big-endian we cannot use the trick with the syndrome value
  183. * as carry-propagation can corrupt the upper bits if the trailing
  184. * bytes in the string contain 0x01.
  185. * However, if there is no NUL byte in the dword, we can generate
  186. * the result directly. We ca not just subtract the bytes as the
  187. * MSB might be significant.
  188. */
  189. CPU_BE( cbnz has_nul, 1f )
  190. CPU_BE( cmp data1, data2 )
  191. CPU_BE( cset result, ne )
  192. CPU_BE( cneg result, result, lo )
  193. CPU_BE( ret )
  194. CPU_BE( 1: )
  195. /*Re-compute the NUL-byte detection, using a byte-reversed value. */
  196. CPU_BE( rev tmp3, data1 )
  197. CPU_BE( sub tmp1, tmp3, zeroones )
  198. CPU_BE( orr tmp2, tmp3, #REP8_7f )
  199. CPU_BE( bic has_nul, tmp1, tmp2 )
  200. CPU_BE( rev has_nul, has_nul )
  201. CPU_BE( orr syndrome, diff, has_nul )
  202. clz pos, syndrome
  203. /*
  204. * The MS-non-zero bit of the syndrome marks either the first bit
  205. * that is different, or the top bit of the first zero byte.
  206. * Shifting left now will bring the critical information into the
  207. * top bits.
  208. */
  209. lsl data1, data1, pos
  210. lsl data2, data2, pos
  211. /*
  212. * But we need to zero-extend (char is unsigned) the value and then
  213. * perform a signed 32-bit subtraction.
  214. */
  215. lsr data1, data1, #56
  216. sub result, data1, data2, lsr #56
  217. ret
  218. ENDPIPROC(strcmp)