memset.S 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. /* memset.S: optimised assembly memset
  2. *
  3. * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. .text
  12. .p2align 4
  13. ###############################################################################
  14. #
  15. # void *memset(void *p, char ch, size_t count)
  16. #
  17. # - NOTE: must not use any stack. exception detection performs function return
  18. # to caller's fixup routine, aborting the remainder of the set
  19. # GR4, GR7, GR8, and GR11 must be managed
  20. #
  21. ###############################################################################
  22. .globl memset,__memset_end
  23. .type memset,@function
  24. memset:
  25. orcc.p gr10,gr0,gr5,icc3 ; GR5 = count
  26. andi gr9,#0xff,gr9
  27. or.p gr8,gr0,gr4 ; GR4 = address
  28. beqlr icc3,#0
  29. # conditionally write a byte to 2b-align the address
  30. setlos.p #1,gr6
  31. andicc gr4,#1,gr0,icc0
  32. ckne icc0,cc7
  33. cstb.p gr9,@(gr4,gr0) ,cc7,#1
  34. csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
  35. cadd.p gr4,gr6,gr4 ,cc7,#1
  36. beqlr icc3,#0
  37. # conditionally write a word to 4b-align the address
  38. andicc.p gr4,#2,gr0,icc0
  39. subicc gr5,#2,gr0,icc1
  40. setlos.p #2,gr6
  41. ckne icc0,cc7
  42. slli.p gr9,#8,gr12 ; need to double up the pattern
  43. cknc icc1,cc5
  44. or.p gr9,gr12,gr12
  45. andcr cc7,cc5,cc7
  46. csth.p gr12,@(gr4,gr0) ,cc7,#1
  47. csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
  48. cadd.p gr4,gr6,gr4 ,cc7,#1
  49. beqlr icc3,#0
  50. # conditionally write a dword to 8b-align the address
  51. andicc.p gr4,#4,gr0,icc0
  52. subicc gr5,#4,gr0,icc1
  53. setlos.p #4,gr6
  54. ckne icc0,cc7
  55. slli.p gr12,#16,gr13 ; need to quadruple-up the pattern
  56. cknc icc1,cc5
  57. or.p gr13,gr12,gr12
  58. andcr cc7,cc5,cc7
  59. cst.p gr12,@(gr4,gr0) ,cc7,#1
  60. csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
  61. cadd.p gr4,gr6,gr4 ,cc7,#1
  62. beqlr icc3,#0
  63. or.p gr12,gr12,gr13 ; need to octuple-up the pattern
  64. # the address is now 8b-aligned - loop around writing 64b chunks
  65. setlos #8,gr7
  66. subi.p gr4,#8,gr4 ; store with update index does weird stuff
  67. setlos #64,gr6
  68. subicc gr5,#64,gr0,icc0
  69. 0: cknc icc0,cc7
  70. cstdu gr12,@(gr4,gr7) ,cc7,#1
  71. cstdu gr12,@(gr4,gr7) ,cc7,#1
  72. cstdu gr12,@(gr4,gr7) ,cc7,#1
  73. cstdu gr12,@(gr4,gr7) ,cc7,#1
  74. cstdu gr12,@(gr4,gr7) ,cc7,#1
  75. cstdu.p gr12,@(gr4,gr7) ,cc7,#1
  76. csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
  77. cstdu.p gr12,@(gr4,gr7) ,cc7,#1
  78. subicc gr5,#64,gr0,icc0
  79. cstdu.p gr12,@(gr4,gr7) ,cc7,#1
  80. beqlr icc3,#0
  81. bnc icc0,#2,0b
  82. # now do 32-byte remnant
  83. subicc.p gr5,#32,gr0,icc0
  84. setlos #32,gr6
  85. cknc icc0,cc7
  86. cstdu.p gr12,@(gr4,gr7) ,cc7,#1
  87. csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
  88. cstdu.p gr12,@(gr4,gr7) ,cc7,#1
  89. setlos #16,gr6
  90. cstdu.p gr12,@(gr4,gr7) ,cc7,#1
  91. subicc gr5,#16,gr0,icc0
  92. cstdu.p gr12,@(gr4,gr7) ,cc7,#1
  93. beqlr icc3,#0
  94. # now do 16-byte remnant
  95. cknc icc0,cc7
  96. cstdu.p gr12,@(gr4,gr7) ,cc7,#1
  97. csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
  98. cstdu.p gr12,@(gr4,gr7) ,cc7,#1
  99. beqlr icc3,#0
  100. # now do 8-byte remnant
  101. subicc gr5,#8,gr0,icc1
  102. cknc icc1,cc7
  103. cstdu.p gr12,@(gr4,gr7) ,cc7,#1
  104. csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3
  105. setlos.p #4,gr7
  106. beqlr icc3,#0
  107. # now do 4-byte remnant
  108. subicc gr5,#4,gr0,icc0
  109. addi.p gr4,#4,gr4
  110. cknc icc0,cc7
  111. cstu.p gr12,@(gr4,gr7) ,cc7,#1
  112. csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3
  113. subicc.p gr5,#2,gr0,icc1
  114. beqlr icc3,#0
  115. # now do 2-byte remnant
  116. setlos #2,gr7
  117. addi.p gr4,#2,gr4
  118. cknc icc1,cc7
  119. csthu.p gr12,@(gr4,gr7) ,cc7,#1
  120. csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3
  121. subicc.p gr5,#1,gr0,icc0
  122. beqlr icc3,#0
  123. # now do 1-byte remnant
  124. setlos #0,gr7
  125. addi.p gr4,#2,gr4
  126. cknc icc0,cc7
  127. cstb.p gr12,@(gr4,gr0) ,cc7,#1
  128. bralr
  129. __memset_end:
  130. .size memset, __memset_end-memset
  131. ###############################################################################
  132. #
  133. # clear memory in userspace
  134. # - return the number of bytes that could not be cleared (0 on complete success)
  135. #
  136. # long __memset_user(void *p, size_t count)
  137. #
  138. ###############################################################################
  139. .globl __memset_user, __memset_user_error_lr, __memset_user_error_handler
  140. .type __memset_user,@function
  141. __memset_user:
  142. movsg lr,gr11
  143. # abuse memset to do the dirty work
  144. or.p gr9,gr9,gr10
  145. setlos #0,gr9
  146. call memset
  147. __memset_user_error_lr:
  148. jmpl.p @(gr11,gr0)
  149. setlos #0,gr8
  150. # deal any exception generated by memset
  151. # GR4 - memset's address tracking pointer
  152. # GR7 - memset's step value (index register for store insns)
  153. # GR8 - memset's original start address
  154. # GR10 - memset's original count
  155. __memset_user_error_handler:
  156. add.p gr4,gr7,gr4
  157. add gr8,gr10,gr8
  158. jmpl.p @(gr11,gr0)
  159. sub gr8,gr4,gr8 ; we return the amount left uncleared
  160. .size __memset_user, .-__memset_user