memcmp_64.S 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. /*
  2. * Author: Anton Blanchard <anton@au.ibm.com>
  3. * Copyright 2015 IBM Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License
  7. * as published by the Free Software Foundation; either version
  8. * 2 of the License, or (at your option) any later version.
  9. */
  10. #include <asm/ppc_asm.h>
  11. #define off8 r6
  12. #define off16 r7
  13. #define off24 r8
  14. #define rA r9
  15. #define rB r10
  16. #define rC r11
  17. #define rD r27
  18. #define rE r28
  19. #define rF r29
  20. #define rG r30
  21. #define rH r31
  22. #ifdef __LITTLE_ENDIAN__
  23. #define LD ldbrx
  24. #else
  25. #define LD ldx
  26. #endif
  27. _GLOBAL(memcmp)
  28. cmpdi cr1,r5,0
  29. /* Use the short loop if both strings are not 8B aligned */
  30. or r6,r3,r4
  31. andi. r6,r6,7
  32. /* Use the short loop if length is less than 32B */
  33. cmpdi cr6,r5,31
  34. beq cr1,.Lzero
  35. bne .Lshort
  36. bgt cr6,.Llong
  37. .Lshort:
  38. mtctr r5
  39. 1: lbz rA,0(r3)
  40. lbz rB,0(r4)
  41. subf. rC,rB,rA
  42. bne .Lnon_zero
  43. bdz .Lzero
  44. lbz rA,1(r3)
  45. lbz rB,1(r4)
  46. subf. rC,rB,rA
  47. bne .Lnon_zero
  48. bdz .Lzero
  49. lbz rA,2(r3)
  50. lbz rB,2(r4)
  51. subf. rC,rB,rA
  52. bne .Lnon_zero
  53. bdz .Lzero
  54. lbz rA,3(r3)
  55. lbz rB,3(r4)
  56. subf. rC,rB,rA
  57. bne .Lnon_zero
  58. addi r3,r3,4
  59. addi r4,r4,4
  60. bdnz 1b
  61. .Lzero:
  62. li r3,0
  63. blr
  64. .Lnon_zero:
  65. mr r3,rC
  66. blr
  67. .Llong:
  68. li off8,8
  69. li off16,16
  70. li off24,24
  71. std r31,-8(r1)
  72. std r30,-16(r1)
  73. std r29,-24(r1)
  74. std r28,-32(r1)
  75. std r27,-40(r1)
  76. srdi r0,r5,5
  77. mtctr r0
  78. andi. r5,r5,31
  79. LD rA,0,r3
  80. LD rB,0,r4
  81. LD rC,off8,r3
  82. LD rD,off8,r4
  83. LD rE,off16,r3
  84. LD rF,off16,r4
  85. LD rG,off24,r3
  86. LD rH,off24,r4
  87. cmpld cr0,rA,rB
  88. addi r3,r3,32
  89. addi r4,r4,32
  90. bdz .Lfirst32
  91. LD rA,0,r3
  92. LD rB,0,r4
  93. cmpld cr1,rC,rD
  94. LD rC,off8,r3
  95. LD rD,off8,r4
  96. cmpld cr6,rE,rF
  97. LD rE,off16,r3
  98. LD rF,off16,r4
  99. cmpld cr7,rG,rH
  100. bne cr0,.LcmpAB
  101. LD rG,off24,r3
  102. LD rH,off24,r4
  103. cmpld cr0,rA,rB
  104. bne cr1,.LcmpCD
  105. addi r3,r3,32
  106. addi r4,r4,32
  107. bdz .Lsecond32
  108. .balign 16
  109. 1: LD rA,0,r3
  110. LD rB,0,r4
  111. cmpld cr1,rC,rD
  112. bne cr6,.LcmpEF
  113. LD rC,off8,r3
  114. LD rD,off8,r4
  115. cmpld cr6,rE,rF
  116. bne cr7,.LcmpGH
  117. LD rE,off16,r3
  118. LD rF,off16,r4
  119. cmpld cr7,rG,rH
  120. bne cr0,.LcmpAB
  121. LD rG,off24,r3
  122. LD rH,off24,r4
  123. cmpld cr0,rA,rB
  124. bne cr1,.LcmpCD
  125. addi r3,r3,32
  126. addi r4,r4,32
  127. bdnz 1b
  128. .Lsecond32:
  129. cmpld cr1,rC,rD
  130. bne cr6,.LcmpEF
  131. cmpld cr6,rE,rF
  132. bne cr7,.LcmpGH
  133. cmpld cr7,rG,rH
  134. bne cr0,.LcmpAB
  135. bne cr1,.LcmpCD
  136. bne cr6,.LcmpEF
  137. bne cr7,.LcmpGH
  138. .Ltail:
  139. ld r31,-8(r1)
  140. ld r30,-16(r1)
  141. ld r29,-24(r1)
  142. ld r28,-32(r1)
  143. ld r27,-40(r1)
  144. cmpdi r5,0
  145. beq .Lzero
  146. b .Lshort
  147. .Lfirst32:
  148. cmpld cr1,rC,rD
  149. cmpld cr6,rE,rF
  150. cmpld cr7,rG,rH
  151. bne cr0,.LcmpAB
  152. bne cr1,.LcmpCD
  153. bne cr6,.LcmpEF
  154. bne cr7,.LcmpGH
  155. b .Ltail
  156. .LcmpAB:
  157. li r3,1
  158. bgt cr0,.Lout
  159. li r3,-1
  160. b .Lout
  161. .LcmpCD:
  162. li r3,1
  163. bgt cr1,.Lout
  164. li r3,-1
  165. b .Lout
  166. .LcmpEF:
  167. li r3,1
  168. bgt cr6,.Lout
  169. li r3,-1
  170. b .Lout
  171. .LcmpGH:
  172. li r3,1
  173. bgt cr7,.Lout
  174. li r3,-1
  175. .Lout:
  176. ld r31,-8(r1)
  177. ld r30,-16(r1)
  178. ld r29,-24(r1)
  179. ld r28,-32(r1)
  180. ld r27,-40(r1)
  181. blr