string_64.S 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. /*
  2. * This program is free software; you can redistribute it and/or modify
  3. * it under the terms of the GNU General Public License as published by
  4. * the Free Software Foundation; either version 2 of the License, or
  5. * (at your option) any later version.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU General Public License for more details.
  11. *
  12. * You should have received a copy of the GNU General Public License
  13. * along with this program; if not, write to the Free Software
  14. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  15. *
  16. * Copyright (C) IBM Corporation, 2012
  17. *
  18. * Author: Anton Blanchard <anton@au.ibm.com>
  19. */
  20. #include <asm/ppc_asm.h>
  21. #include <asm/asm-offsets.h>
  22. .section ".toc","aw"
  23. PPC64_CACHES:
  24. .tc ppc64_caches[TC],ppc64_caches
  25. .section ".text"
  26. /**
  27. * __clear_user: - Zero a block of memory in user space, with less checking.
  28. * @to: Destination address, in user space.
  29. * @n: Number of bytes to zero.
  30. *
  31. * Zero a block of memory in user space. Caller must check
  32. * the specified block with access_ok() before calling this function.
  33. *
  34. * Returns number of bytes that could not be cleared.
  35. * On success, this will be zero.
  36. */
  37. .macro err1
  38. 100:
  39. .section __ex_table,"a"
  40. .align 3
  41. .llong 100b,.Ldo_err1
  42. .previous
  43. .endm
  44. .macro err2
  45. 200:
  46. .section __ex_table,"a"
  47. .align 3
  48. .llong 200b,.Ldo_err2
  49. .previous
  50. .endm
  51. .macro err3
  52. 300:
  53. .section __ex_table,"a"
  54. .align 3
  55. .llong 300b,.Ldo_err3
  56. .previous
  57. .endm
  58. .Ldo_err1:
  59. mr r3,r8
  60. .Ldo_err2:
  61. mtctr r4
  62. 1:
  63. err3; stb r0,0(r3)
  64. addi r3,r3,1
  65. addi r4,r4,-1
  66. bdnz 1b
  67. .Ldo_err3:
  68. mr r3,r4
  69. blr
  70. _GLOBAL_TOC(__clear_user)
  71. cmpdi r4,32
  72. neg r6,r3
  73. li r0,0
  74. blt .Lshort_clear
  75. mr r8,r3
  76. mtocrf 0x01,r6
  77. clrldi r6,r6,(64-3)
  78. /* Get the destination 8 byte aligned */
  79. bf cr7*4+3,1f
  80. err1; stb r0,0(r3)
  81. addi r3,r3,1
  82. 1: bf cr7*4+2,2f
  83. err1; sth r0,0(r3)
  84. addi r3,r3,2
  85. 2: bf cr7*4+1,3f
  86. err1; stw r0,0(r3)
  87. addi r3,r3,4
  88. 3: sub r4,r4,r6
  89. cmpdi r4,32
  90. cmpdi cr1,r4,512
  91. blt .Lshort_clear
  92. bgt cr1,.Llong_clear
  93. .Lmedium_clear:
  94. srdi r6,r4,5
  95. mtctr r6
  96. /* Do 32 byte chunks */
  97. 4:
  98. err2; std r0,0(r3)
  99. err2; std r0,8(r3)
  100. err2; std r0,16(r3)
  101. err2; std r0,24(r3)
  102. addi r3,r3,32
  103. addi r4,r4,-32
  104. bdnz 4b
  105. .Lshort_clear:
  106. /* up to 31 bytes to go */
  107. cmpdi r4,16
  108. blt 6f
  109. err2; std r0,0(r3)
  110. err2; std r0,8(r3)
  111. addi r3,r3,16
  112. addi r4,r4,-16
  113. /* Up to 15 bytes to go */
  114. 6: mr r8,r3
  115. clrldi r4,r4,(64-4)
  116. mtocrf 0x01,r4
  117. bf cr7*4+0,7f
  118. err1; std r0,0(r3)
  119. addi r3,r3,8
  120. 7: bf cr7*4+1,8f
  121. err1; stw r0,0(r3)
  122. addi r3,r3,4
  123. 8: bf cr7*4+2,9f
  124. err1; sth r0,0(r3)
  125. addi r3,r3,2
  126. 9: bf cr7*4+3,10f
  127. err1; stb r0,0(r3)
  128. 10: li r3,0
  129. blr
  130. .Llong_clear:
  131. ld r5,PPC64_CACHES@toc(r2)
  132. bf cr7*4+0,11f
  133. err2; std r0,0(r3)
  134. addi r3,r3,8
  135. addi r4,r4,-8
  136. /* Destination is 16 byte aligned, need to get it cacheline aligned */
  137. 11: lwz r7,DCACHEL1LOGLINESIZE(r5)
  138. lwz r9,DCACHEL1LINESIZE(r5)
  139. /*
  140. * With worst case alignment the long clear loop takes a minimum
  141. * of 1 byte less than 2 cachelines.
  142. */
  143. sldi r10,r9,2
  144. cmpd r4,r10
  145. blt .Lmedium_clear
  146. neg r6,r3
  147. addi r10,r9,-1
  148. and. r5,r6,r10
  149. beq 13f
  150. srdi r6,r5,4
  151. mtctr r6
  152. mr r8,r3
  153. 12:
  154. err1; std r0,0(r3)
  155. err1; std r0,8(r3)
  156. addi r3,r3,16
  157. bdnz 12b
  158. sub r4,r4,r5
  159. 13: srd r6,r4,r7
  160. mtctr r6
  161. mr r8,r3
  162. 14:
  163. err1; dcbz r0,r3
  164. add r3,r3,r9
  165. bdnz 14b
  166. and r4,r4,r10
  167. cmpdi r4,32
  168. blt .Lshort_clear
  169. b .Lmedium_clear