do_csum.S 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. /* Optimised simple memory checksum
  2. *
  3. * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public Licence
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the Licence, or (at your option) any later version.
  10. */
  11. #include <asm/cache.h>
  12. .section .text
  13. .balign L1_CACHE_BYTES
  14. ###############################################################################
  15. #
  16. # unsigned int do_csum(const unsigned char *buff, int len)
  17. #
  18. ###############################################################################
  19. .globl do_csum
  20. .type do_csum,@function
  21. do_csum:
  22. movm [d2,d3],(sp)
  23. mov d1,d2 # count
  24. mov d0,a0 # buff
  25. mov a0,a1
  26. clr d1 # accumulator
  27. cmp +0,d2
  28. ble do_csum_done # check for zero length or negative
  29. # 4-byte align the buffer pointer
  30. btst +3,a0
  31. beq do_csum_now_4b_aligned
  32. btst +1,a0
  33. beq do_csum_addr_not_odd
  34. movbu (a0),d0
  35. inc a0
  36. asl +8,d0
  37. add d0,d1
  38. add -1,d2
  39. do_csum_addr_not_odd:
  40. cmp +2,d2
  41. bcs do_csum_fewer_than_4
  42. btst +2,a0
  43. beq do_csum_now_4b_aligned
  44. movhu (a0+),d0
  45. add d0,d1
  46. add -2,d2
  47. cmp +4,d2
  48. bcs do_csum_fewer_than_4
  49. do_csum_now_4b_aligned:
  50. # we want to checksum as much as we can in chunks of 32 bytes
  51. cmp +31,d2
  52. bls do_csum_remainder # 4-byte aligned remainder
  53. add -32,d2
  54. mov +32,d3
  55. do_csum_loop:
  56. mov (a0+),d0
  57. mov (a0+),e0
  58. mov (a0+),e1
  59. mov (a0+),e3
  60. add d0,d1
  61. addc e0,d1
  62. addc e1,d1
  63. addc e3,d1
  64. mov (a0+),d0
  65. mov (a0+),e0
  66. mov (a0+),e1
  67. mov (a0+),e3
  68. addc d0,d1
  69. addc e0,d1
  70. addc e1,d1
  71. addc e3,d1
  72. addc +0,d1
  73. sub d3,d2
  74. bcc do_csum_loop
  75. add d3,d2
  76. beq do_csum_done
  77. do_csum_remainder:
  78. # cut 16-31 bytes down to 0-15
  79. cmp +16,d2
  80. bcs do_csum_fewer_than_16
  81. mov (a0+),d0
  82. mov (a0+),e0
  83. mov (a0+),e1
  84. mov (a0+),e3
  85. add d0,d1
  86. addc e0,d1
  87. addc e1,d1
  88. addc e3,d1
  89. addc +0,d1
  90. add -16,d2
  91. beq do_csum_done
  92. do_csum_fewer_than_16:
  93. # copy the remaining whole words
  94. cmp +4,d2
  95. bcs do_csum_fewer_than_4
  96. cmp +8,d2
  97. bcs do_csum_one_word
  98. cmp +12,d2
  99. bcs do_csum_two_words
  100. mov (a0+),d0
  101. add d0,d1
  102. addc +0,d1
  103. do_csum_two_words:
  104. mov (a0+),d0
  105. add d0,d1
  106. addc +0,d1
  107. do_csum_one_word:
  108. mov (a0+),d0
  109. add d0,d1
  110. addc +0,d1
  111. do_csum_fewer_than_4:
  112. and +3,d2
  113. beq do_csum_done
  114. xor_cmp d0,d0,+2,d2
  115. bcs do_csum_fewer_than_2
  116. movhu (a0+),d0
  117. and +1,d2
  118. beq do_csum_add_last_bit
  119. do_csum_fewer_than_2:
  120. movbu (a0),d3
  121. add d3,d0
  122. do_csum_add_last_bit:
  123. add d0,d1
  124. addc +0,d1
  125. do_csum_done:
  126. # compress the checksum down to 16 bits
  127. mov +0xffff0000,d0
  128. and d1,d0
  129. asl +16,d1
  130. add d1,d0
  131. addc +0xffff,d0
  132. lsr +16,d0
  133. # flip the halves of the word result if the buffer was oddly aligned
  134. and +1,a1
  135. beq do_csum_not_oddly_aligned
  136. swaph d0,d0 # exchange bits 15:8 with 7:0
  137. do_csum_not_oddly_aligned:
  138. ret [d2,d3],8
  139. .size do_csum, .-do_csum