memscan_64.S 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. /*
  2. * memscan.S: Optimized memscan for Sparc64.
  3. *
  4. * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
  5. * Copyright (C) 1998 David S. Miller (davem@redhat.com)
  6. */
  7. #define HI_MAGIC 0x8080808080808080
  8. #define LO_MAGIC 0x0101010101010101
  9. #define ASI_PL 0x88
  10. .text
  11. .align 32
  12. .globl __memscan_zero, __memscan_generic
  13. .globl memscan
  14. __memscan_zero:
  15. /* %o0 = bufp, %o1 = size */
  16. brlez,pn %o1, szzero
  17. andcc %o0, 7, %g0
  18. be,pt %icc, we_are_aligned
  19. sethi %hi(HI_MAGIC), %o4
  20. ldub [%o0], %o5
  21. 1: subcc %o1, 1, %o1
  22. brz,pn %o5, 10f
  23. add %o0, 1, %o0
  24. be,pn %xcc, szzero
  25. andcc %o0, 7, %g0
  26. bne,a,pn %icc, 1b
  27. ldub [%o0], %o5
  28. we_are_aligned:
  29. ldxa [%o0] ASI_PL, %o5
  30. or %o4, %lo(HI_MAGIC), %o3
  31. sllx %o3, 32, %o4
  32. or %o4, %o3, %o3
  33. srlx %o3, 7, %o2
  34. msloop:
  35. sub %o1, 8, %o1
  36. add %o0, 8, %o0
  37. sub %o5, %o2, %o4
  38. xor %o4, %o5, %o4
  39. andcc %o4, %o3, %g3
  40. bne,pn %xcc, check_bytes
  41. srlx %o4, 32, %g3
  42. brgz,a,pt %o1, msloop
  43. ldxa [%o0] ASI_PL, %o5
  44. check_bytes:
  45. bne,a,pn %icc, 2f
  46. andcc %o5, 0xff, %g0
  47. add %o0, -5, %g2
  48. ba,pt %xcc, 3f
  49. srlx %o5, 32, %g7
  50. 2: srlx %o5, 8, %g7
  51. be,pn %icc, 1f
  52. add %o0, -8, %g2
  53. andcc %g7, 0xff, %g0
  54. srlx %g7, 8, %g7
  55. be,pn %icc, 1f
  56. inc %g2
  57. andcc %g7, 0xff, %g0
  58. srlx %g7, 8, %g7
  59. be,pn %icc, 1f
  60. inc %g2
  61. andcc %g7, 0xff, %g0
  62. srlx %g7, 8, %g7
  63. be,pn %icc, 1f
  64. inc %g2
  65. andcc %g3, %o3, %g0
  66. be,a,pn %icc, 2f
  67. mov %o0, %g2
  68. 3: andcc %g7, 0xff, %g0
  69. srlx %g7, 8, %g7
  70. be,pn %icc, 1f
  71. inc %g2
  72. andcc %g7, 0xff, %g0
  73. srlx %g7, 8, %g7
  74. be,pn %icc, 1f
  75. inc %g2
  76. andcc %g7, 0xff, %g0
  77. srlx %g7, 8, %g7
  78. be,pn %icc, 1f
  79. inc %g2
  80. andcc %g7, 0xff, %g0
  81. srlx %g7, 8, %g7
  82. be,pn %icc, 1f
  83. inc %g2
  84. 2: brgz,a,pt %o1, msloop
  85. ldxa [%o0] ASI_PL, %o5
  86. inc %g2
  87. 1: add %o0, %o1, %o0
  88. cmp %g2, %o0
  89. retl
  90. movle %xcc, %g2, %o0
  91. 10: retl
  92. sub %o0, 1, %o0
  93. szzero: retl
  94. nop
  95. memscan:
  96. __memscan_generic:
  97. /* %o0 = addr, %o1 = c, %o2 = size */
  98. brz,pn %o2, 3f
  99. add %o0, %o2, %o3
  100. ldub [%o0], %o5
  101. sub %g0, %o2, %o4
  102. 1:
  103. cmp %o5, %o1
  104. be,pn %icc, 2f
  105. addcc %o4, 1, %o4
  106. bne,a,pt %xcc, 1b
  107. ldub [%o3 + %o4], %o5
  108. retl
  109. /* The delay slot is the same as the next insn, this is just to make it look more awful */
  110. 2:
  111. add %o3, %o4, %o0
  112. retl
  113. sub %o0, 1, %o0
  114. 3:
  115. retl
  116. nop