unaligned-sh4a.h 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. #ifndef __ASM_SH_UNALIGNED_SH4A_H
  2. #define __ASM_SH_UNALIGNED_SH4A_H
  3. /*
  4. * SH-4A has support for unaligned 32-bit loads, and 32-bit loads only.
  5. * Support for 64-bit accesses are done through shifting and masking
  6. * relative to the endianness. Unaligned stores are not supported by the
  7. * instruction encoding, so these continue to use the packed
  8. * struct.
  9. *
  10. * The same note as with the movli.l/movco.l pair applies here, as long
  11. * as the load is guaranteed to be inlined, nothing else will hook in to
  12. * r0 and we get the return value for free.
  13. *
  14. * NOTE: Due to the fact we require r0 encoding, care should be taken to
  15. * avoid mixing these heavily with other r0 consumers, such as the atomic
  16. * ops. Failure to adhere to this can result in the compiler running out
  17. * of spill registers and blowing up when building at low optimization
  18. * levels. See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34777.
  19. */
  20. #include <linux/unaligned/packed_struct.h>
  21. #include <linux/types.h>
  22. #include <asm/byteorder.h>
  23. static inline u16 sh4a_get_unaligned_cpu16(const u8 *p)
  24. {
  25. #ifdef __LITTLE_ENDIAN
  26. return p[0] | p[1] << 8;
  27. #else
  28. return p[0] << 8 | p[1];
  29. #endif
  30. }
  31. static __always_inline u32 sh4a_get_unaligned_cpu32(const u8 *p)
  32. {
  33. unsigned long unaligned;
  34. __asm__ __volatile__ (
  35. "movua.l @%1, %0\n\t"
  36. : "=z" (unaligned)
  37. : "r" (p)
  38. );
  39. return unaligned;
  40. }
  41. /*
  42. * Even though movua.l supports auto-increment on the read side, it can
  43. * only store to r0 due to instruction encoding constraints, so just let
  44. * the compiler sort it out on its own.
  45. */
  46. static inline u64 sh4a_get_unaligned_cpu64(const u8 *p)
  47. {
  48. #ifdef __LITTLE_ENDIAN
  49. return (u64)sh4a_get_unaligned_cpu32(p + 4) << 32 |
  50. sh4a_get_unaligned_cpu32(p);
  51. #else
  52. return (u64)sh4a_get_unaligned_cpu32(p) << 32 |
  53. sh4a_get_unaligned_cpu32(p + 4);
  54. #endif
  55. }
  56. static inline u16 get_unaligned_le16(const void *p)
  57. {
  58. return le16_to_cpu(sh4a_get_unaligned_cpu16(p));
  59. }
  60. static inline u32 get_unaligned_le32(const void *p)
  61. {
  62. return le32_to_cpu(sh4a_get_unaligned_cpu32(p));
  63. }
  64. static inline u64 get_unaligned_le64(const void *p)
  65. {
  66. return le64_to_cpu(sh4a_get_unaligned_cpu64(p));
  67. }
  68. static inline u16 get_unaligned_be16(const void *p)
  69. {
  70. return be16_to_cpu(sh4a_get_unaligned_cpu16(p));
  71. }
  72. static inline u32 get_unaligned_be32(const void *p)
  73. {
  74. return be32_to_cpu(sh4a_get_unaligned_cpu32(p));
  75. }
  76. static inline u64 get_unaligned_be64(const void *p)
  77. {
  78. return be64_to_cpu(sh4a_get_unaligned_cpu64(p));
  79. }
  80. static inline void nonnative_put_le16(u16 val, u8 *p)
  81. {
  82. *p++ = val;
  83. *p++ = val >> 8;
  84. }
  85. static inline void nonnative_put_le32(u32 val, u8 *p)
  86. {
  87. nonnative_put_le16(val, p);
  88. nonnative_put_le16(val >> 16, p + 2);
  89. }
  90. static inline void nonnative_put_le64(u64 val, u8 *p)
  91. {
  92. nonnative_put_le32(val, p);
  93. nonnative_put_le32(val >> 32, p + 4);
  94. }
  95. static inline void nonnative_put_be16(u16 val, u8 *p)
  96. {
  97. *p++ = val >> 8;
  98. *p++ = val;
  99. }
  100. static inline void nonnative_put_be32(u32 val, u8 *p)
  101. {
  102. nonnative_put_be16(val >> 16, p);
  103. nonnative_put_be16(val, p + 2);
  104. }
  105. static inline void nonnative_put_be64(u64 val, u8 *p)
  106. {
  107. nonnative_put_be32(val >> 32, p);
  108. nonnative_put_be32(val, p + 4);
  109. }
  110. static inline void put_unaligned_le16(u16 val, void *p)
  111. {
  112. #ifdef __LITTLE_ENDIAN
  113. __put_unaligned_cpu16(val, p);
  114. #else
  115. nonnative_put_le16(val, p);
  116. #endif
  117. }
  118. static inline void put_unaligned_le32(u32 val, void *p)
  119. {
  120. #ifdef __LITTLE_ENDIAN
  121. __put_unaligned_cpu32(val, p);
  122. #else
  123. nonnative_put_le32(val, p);
  124. #endif
  125. }
  126. static inline void put_unaligned_le64(u64 val, void *p)
  127. {
  128. #ifdef __LITTLE_ENDIAN
  129. __put_unaligned_cpu64(val, p);
  130. #else
  131. nonnative_put_le64(val, p);
  132. #endif
  133. }
  134. static inline void put_unaligned_be16(u16 val, void *p)
  135. {
  136. #ifdef __BIG_ENDIAN
  137. __put_unaligned_cpu16(val, p);
  138. #else
  139. nonnative_put_be16(val, p);
  140. #endif
  141. }
  142. static inline void put_unaligned_be32(u32 val, void *p)
  143. {
  144. #ifdef __BIG_ENDIAN
  145. __put_unaligned_cpu32(val, p);
  146. #else
  147. nonnative_put_be32(val, p);
  148. #endif
  149. }
  150. static inline void put_unaligned_be64(u64 val, void *p)
  151. {
  152. #ifdef __BIG_ENDIAN
  153. __put_unaligned_cpu64(val, p);
  154. #else
  155. nonnative_put_be64(val, p);
  156. #endif
  157. }
  158. /*
  159. * While it's a bit non-obvious, even though the generic le/be wrappers
  160. * use the __get/put_xxx prefixing, they actually wrap in to the
  161. * non-prefixed get/put_xxx variants as provided above.
  162. */
  163. #include <linux/unaligned/generic.h>
  164. #ifdef __LITTLE_ENDIAN
  165. # define get_unaligned __get_unaligned_le
  166. # define put_unaligned __put_unaligned_le
  167. #else
  168. # define get_unaligned __get_unaligned_be
  169. # define put_unaligned __put_unaligned_be
  170. #endif
  171. #endif /* __ASM_SH_UNALIGNED_SH4A_H */