memcpy.S 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. /* memcpy.S: optimised assembly memcpy
  2. *
  3. * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. .text
  12. .p2align 4
  13. ###############################################################################
  14. #
  15. # void *memcpy(void *to, const char *from, size_t count)
  16. #
  17. # - NOTE: must not use any stack. exception detection performs function return
  18. # to caller's fixup routine, aborting the remainder of the copy
  19. #
  20. ###############################################################################
  21. .globl memcpy,__memcpy_end
  22. .type memcpy,@function
  23. memcpy:
  24. or.p gr8,gr9,gr4
  25. orcc gr10,gr0,gr0,icc3
  26. or.p gr10,gr4,gr4
  27. beqlr icc3,#0
  28. # optimise based on best common alignment for to, from & count
  29. andicc.p gr4,#0x0f,gr0,icc0
  30. setlos #8,gr11
  31. andicc.p gr4,#0x07,gr0,icc1
  32. beq icc0,#0,memcpy_16
  33. andicc.p gr4,#0x03,gr0,icc0
  34. beq icc1,#0,memcpy_8
  35. andicc.p gr4,#0x01,gr0,icc1
  36. beq icc0,#0,memcpy_4
  37. setlos.p #1,gr11
  38. beq icc1,#0,memcpy_2
  39. # do byte by byte copy
  40. sub.p gr8,gr11,gr3
  41. sub gr9,gr11,gr9
  42. 0: ldubu.p @(gr9,gr11),gr4
  43. subicc gr10,#1,gr10,icc0
  44. stbu.p gr4,@(gr3,gr11)
  45. bne icc0,#2,0b
  46. bralr
  47. # do halfword by halfword copy
  48. memcpy_2:
  49. setlos #2,gr11
  50. sub.p gr8,gr11,gr3
  51. sub gr9,gr11,gr9
  52. 0: lduhu.p @(gr9,gr11),gr4
  53. subicc gr10,#2,gr10,icc0
  54. sthu.p gr4,@(gr3,gr11)
  55. bne icc0,#2,0b
  56. bralr
  57. # do word by word copy
  58. memcpy_4:
  59. setlos #4,gr11
  60. sub.p gr8,gr11,gr3
  61. sub gr9,gr11,gr9
  62. 0: ldu.p @(gr9,gr11),gr4
  63. subicc gr10,#4,gr10,icc0
  64. stu.p gr4,@(gr3,gr11)
  65. bne icc0,#2,0b
  66. bralr
  67. # do double-word by double-word copy
  68. memcpy_8:
  69. sub.p gr8,gr11,gr3
  70. sub gr9,gr11,gr9
  71. 0: lddu.p @(gr9,gr11),gr4
  72. subicc gr10,#8,gr10,icc0
  73. stdu.p gr4,@(gr3,gr11)
  74. bne icc0,#2,0b
  75. bralr
  76. # do quad-word by quad-word copy
  77. memcpy_16:
  78. sub.p gr8,gr11,gr3
  79. sub gr9,gr11,gr9
  80. 0: lddu @(gr9,gr11),gr4
  81. lddu.p @(gr9,gr11),gr6
  82. subicc gr10,#16,gr10,icc0
  83. stdu gr4,@(gr3,gr11)
  84. stdu.p gr6,@(gr3,gr11)
  85. bne icc0,#2,0b
  86. bralr
  87. __memcpy_end:
  88. .size memcpy, __memcpy_end-memcpy
  89. ###############################################################################
  90. #
  91. # copy to/from userspace
  92. # - return the number of bytes that could not be copied (0 on complete success)
  93. #
  94. # long __memcpy_user(void *dst, const void *src, size_t count)
  95. #
  96. ###############################################################################
  97. .globl __memcpy_user, __memcpy_user_error_lr, __memcpy_user_error_handler
  98. .type __memcpy_user,@function
  99. __memcpy_user:
  100. movsg lr,gr7
  101. subi.p sp,#8,sp
  102. add gr8,gr10,gr6 ; calculate expected end address
  103. stdi gr6,@(sp,#0)
  104. # abuse memcpy to do the dirty work
  105. call memcpy
  106. __memcpy_user_error_lr:
  107. ldi.p @(sp,#4),gr7
  108. setlos #0,gr8
  109. jmpl.p @(gr7,gr0)
  110. addi sp,#8,sp
  111. # deal any exception generated by memcpy
  112. # GR8 - memcpy's current dest address
  113. # GR11 - memset's step value (index register for store insns)
  114. __memcpy_user_error_handler:
  115. lddi.p @(sp,#0),gr4 ; load GR4 with dst+count, GR5 with ret addr
  116. add gr11,gr3,gr7
  117. sub.p gr4,gr7,gr8
  118. addi sp,#8,sp
  119. jmpl @(gr5,gr0)
  120. .size __memcpy_user, .-__memcpy_user