fir.h 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. /*
  2. * SpanDSP - a series of DSP components for telephony
  3. *
  4. * fir.h - General telephony FIR routines
  5. *
  6. * Written by Steve Underwood <steveu@coppice.org>
  7. *
  8. * Copyright (C) 2002 Steve Underwood
  9. *
  10. * All rights reserved.
  11. *
  12. * This program is free software; you can redistribute it and/or modify
  13. * it under the terms of the GNU General Public License version 2, as
  14. * published by the Free Software Foundation.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with this program; if not, write to the Free Software
  23. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24. */
  25. #if !defined(_FIR_H_)
  26. #define _FIR_H_
  27. /*
  28. Blackfin NOTES & IDEAS:
  29. A simple dot product function is used to implement the filter. This performs
  30. just one MAC/cycle which is inefficient but was easy to implement as a first
  31. pass. The current Blackfin code also uses an unrolled form of the filter
  32. history to avoid 0 length hardware loop issues. This is wasteful of
  33. memory.
  34. Ideas for improvement:
  35. 1/ Rewrite filter for dual MAC inner loop. The issue here is handling
  36. history sample offsets that are 16 bit aligned - the dual MAC needs
  37. 32 bit aligmnent. There are some good examples in libbfdsp.
  38. 2/ Use the hardware circular buffer facility tohalve memory usage.
  39. 3/ Consider using internal memory.
  40. Using less memory might also improve speed as cache misses will be
  41. reduced. A drop in MIPs and memory approaching 50% should be
  42. possible.
  43. The foreground and background filters currenlty use a total of
  44. about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo
  45. can.
  46. */
  47. /*
  48. * 16 bit integer FIR descriptor. This defines the working state for a single
  49. * instance of an FIR filter using 16 bit integer coefficients.
  50. */
  51. struct fir16_state_t {
  52. int taps;
  53. int curr_pos;
  54. const int16_t *coeffs;
  55. int16_t *history;
  56. };
  57. /*
  58. * 32 bit integer FIR descriptor. This defines the working state for a single
  59. * instance of an FIR filter using 32 bit integer coefficients, and filtering
  60. * 16 bit integer data.
  61. */
  62. struct fir32_state_t {
  63. int taps;
  64. int curr_pos;
  65. const int32_t *coeffs;
  66. int16_t *history;
  67. };
  68. /*
  69. * Floating point FIR descriptor. This defines the working state for a single
  70. * instance of an FIR filter using floating point coefficients and data.
  71. */
  72. struct fir_float_state_t {
  73. int taps;
  74. int curr_pos;
  75. const float *coeffs;
  76. float *history;
  77. };
  78. static inline const int16_t *fir16_create(struct fir16_state_t *fir,
  79. const int16_t *coeffs, int taps)
  80. {
  81. fir->taps = taps;
  82. fir->curr_pos = taps - 1;
  83. fir->coeffs = coeffs;
  84. #if defined(__bfin__)
  85. fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL);
  86. #else
  87. fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
  88. #endif
  89. return fir->history;
  90. }
  91. static inline void fir16_flush(struct fir16_state_t *fir)
  92. {
  93. #if defined(__bfin__)
  94. memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t));
  95. #else
  96. memset(fir->history, 0, fir->taps * sizeof(int16_t));
  97. #endif
  98. }
  99. static inline void fir16_free(struct fir16_state_t *fir)
  100. {
  101. kfree(fir->history);
  102. }
  103. #ifdef __bfin__
  104. static inline int32_t dot_asm(short *x, short *y, int len)
  105. {
  106. int dot;
  107. len--;
  108. __asm__("I0 = %1;\n\t"
  109. "I1 = %2;\n\t"
  110. "A0 = 0;\n\t"
  111. "R0.L = W[I0++] || R1.L = W[I1++];\n\t"
  112. "LOOP dot%= LC0 = %3;\n\t"
  113. "LOOP_BEGIN dot%=;\n\t"
  114. "A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t"
  115. "LOOP_END dot%=;\n\t"
  116. "A0 += R0.L*R1.L (IS);\n\t"
  117. "R0 = A0;\n\t"
  118. "%0 = R0;\n\t"
  119. : "=&d"(dot)
  120. : "a"(x), "a"(y), "a"(len)
  121. : "I0", "I1", "A1", "A0", "R0", "R1"
  122. );
  123. return dot;
  124. }
  125. #endif
  126. static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample)
  127. {
  128. int32_t y;
  129. #if defined(__bfin__)
  130. fir->history[fir->curr_pos] = sample;
  131. fir->history[fir->curr_pos + fir->taps] = sample;
  132. y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos],
  133. fir->taps);
  134. #else
  135. int i;
  136. int offset1;
  137. int offset2;
  138. fir->history[fir->curr_pos] = sample;
  139. offset2 = fir->curr_pos;
  140. offset1 = fir->taps - offset2;
  141. y = 0;
  142. for (i = fir->taps - 1; i >= offset1; i--)
  143. y += fir->coeffs[i] * fir->history[i - offset1];
  144. for (; i >= 0; i--)
  145. y += fir->coeffs[i] * fir->history[i + offset2];
  146. #endif
  147. if (fir->curr_pos <= 0)
  148. fir->curr_pos = fir->taps;
  149. fir->curr_pos--;
  150. return (int16_t) (y >> 15);
  151. }
  152. static inline const int16_t *fir32_create(struct fir32_state_t *fir,
  153. const int32_t *coeffs, int taps)
  154. {
  155. fir->taps = taps;
  156. fir->curr_pos = taps - 1;
  157. fir->coeffs = coeffs;
  158. fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
  159. return fir->history;
  160. }
  161. static inline void fir32_flush(struct fir32_state_t *fir)
  162. {
  163. memset(fir->history, 0, fir->taps * sizeof(int16_t));
  164. }
  165. static inline void fir32_free(struct fir32_state_t *fir)
  166. {
  167. kfree(fir->history);
  168. }
  169. static inline int16_t fir32(struct fir32_state_t *fir, int16_t sample)
  170. {
  171. int i;
  172. int32_t y;
  173. int offset1;
  174. int offset2;
  175. fir->history[fir->curr_pos] = sample;
  176. offset2 = fir->curr_pos;
  177. offset1 = fir->taps - offset2;
  178. y = 0;
  179. for (i = fir->taps - 1; i >= offset1; i--)
  180. y += fir->coeffs[i] * fir->history[i - offset1];
  181. for (; i >= 0; i--)
  182. y += fir->coeffs[i] * fir->history[i + offset2];
  183. if (fir->curr_pos <= 0)
  184. fir->curr_pos = fir->taps;
  185. fir->curr_pos--;
  186. return (int16_t) (y >> 15);
  187. }
  188. #endif