memmove.S 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. /*
  2. * arch/alpha/lib/memmove.S
  3. *
  4. * Barely optimized memmove routine for Alpha EV5.
  5. *
  6. * This is hand-massaged output from the original memcpy.c. We defer to
  7. * memcpy whenever possible; the backwards copy loops are not unrolled.
  8. */
  9. .set noat
  10. .set noreorder
  11. .text
  12. .align 4
  13. .globl memmove
  14. .ent memmove
  15. memmove:
  16. ldgp $29, 0($27)
  17. unop
  18. nop
  19. .prologue 1
  20. addq $16,$18,$4
  21. addq $17,$18,$5
  22. cmpule $4,$17,$1 /* dest + n <= src */
  23. cmpule $5,$16,$2 /* dest >= src + n */
  24. bis $1,$2,$1
  25. mov $16,$0
  26. xor $16,$17,$2
  27. bne $1,memcpy !samegp
  28. and $2,7,$2 /* Test for src/dest co-alignment. */
  29. and $16,7,$1
  30. cmpule $16,$17,$3
  31. bne $3,$memmove_up /* dest < src */
  32. and $4,7,$1
  33. bne $2,$misaligned_dn
  34. unop
  35. beq $1,$skip_aligned_byte_loop_head_dn
  36. $aligned_byte_loop_head_dn:
  37. lda $4,-1($4)
  38. lda $5,-1($5)
  39. unop
  40. ble $18,$egress
  41. ldq_u $3,0($5)
  42. ldq_u $2,0($4)
  43. lda $18,-1($18)
  44. extbl $3,$5,$1
  45. insbl $1,$4,$1
  46. mskbl $2,$4,$2
  47. bis $1,$2,$1
  48. and $4,7,$6
  49. stq_u $1,0($4)
  50. bne $6,$aligned_byte_loop_head_dn
  51. $skip_aligned_byte_loop_head_dn:
  52. lda $18,-8($18)
  53. blt $18,$skip_aligned_word_loop_dn
  54. $aligned_word_loop_dn:
  55. ldq $1,-8($5)
  56. nop
  57. lda $5,-8($5)
  58. lda $18,-8($18)
  59. stq $1,-8($4)
  60. nop
  61. lda $4,-8($4)
  62. bge $18,$aligned_word_loop_dn
  63. $skip_aligned_word_loop_dn:
  64. lda $18,8($18)
  65. bgt $18,$byte_loop_tail_dn
  66. unop
  67. ret $31,($26),1
  68. .align 4
  69. $misaligned_dn:
  70. nop
  71. fnop
  72. unop
  73. beq $18,$egress
  74. $byte_loop_tail_dn:
  75. ldq_u $3,-1($5)
  76. ldq_u $2,-1($4)
  77. lda $5,-1($5)
  78. lda $4,-1($4)
  79. lda $18,-1($18)
  80. extbl $3,$5,$1
  81. insbl $1,$4,$1
  82. mskbl $2,$4,$2
  83. bis $1,$2,$1
  84. stq_u $1,0($4)
  85. bgt $18,$byte_loop_tail_dn
  86. br $egress
  87. $memmove_up:
  88. mov $16,$4
  89. mov $17,$5
  90. bne $2,$misaligned_up
  91. beq $1,$skip_aligned_byte_loop_head_up
  92. $aligned_byte_loop_head_up:
  93. unop
  94. ble $18,$egress
  95. ldq_u $3,0($5)
  96. ldq_u $2,0($4)
  97. lda $18,-1($18)
  98. extbl $3,$5,$1
  99. insbl $1,$4,$1
  100. mskbl $2,$4,$2
  101. bis $1,$2,$1
  102. lda $5,1($5)
  103. stq_u $1,0($4)
  104. lda $4,1($4)
  105. and $4,7,$6
  106. bne $6,$aligned_byte_loop_head_up
  107. $skip_aligned_byte_loop_head_up:
  108. lda $18,-8($18)
  109. blt $18,$skip_aligned_word_loop_up
  110. $aligned_word_loop_up:
  111. ldq $1,0($5)
  112. nop
  113. lda $5,8($5)
  114. lda $18,-8($18)
  115. stq $1,0($4)
  116. nop
  117. lda $4,8($4)
  118. bge $18,$aligned_word_loop_up
  119. $skip_aligned_word_loop_up:
  120. lda $18,8($18)
  121. bgt $18,$byte_loop_tail_up
  122. unop
  123. ret $31,($26),1
  124. .align 4
  125. $misaligned_up:
  126. nop
  127. fnop
  128. unop
  129. beq $18,$egress
  130. $byte_loop_tail_up:
  131. ldq_u $3,0($5)
  132. ldq_u $2,0($4)
  133. lda $18,-1($18)
  134. extbl $3,$5,$1
  135. insbl $1,$4,$1
  136. mskbl $2,$4,$2
  137. bis $1,$2,$1
  138. stq_u $1,0($4)
  139. lda $5,1($5)
  140. lda $4,1($4)
  141. nop
  142. bgt $18,$byte_loop_tail_up
  143. $egress:
  144. ret $31,($26),1
  145. nop
  146. nop
  147. nop
  148. .end memmove