ts_kmp.c 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. /*
  2. * lib/ts_kmp.c Knuth-Morris-Pratt text search implementation
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation; either version
  7. * 2 of the License, or (at your option) any later version.
  8. *
  9. * Authors: Thomas Graf <tgraf@suug.ch>
  10. *
  11. * ==========================================================================
  12. *
  13. * Implements a linear-time string-matching algorithm due to Knuth,
  14. * Morris, and Pratt [1]. Their algorithm avoids the explicit
  15. * computation of the transition function DELTA altogether. Its
  16. * matching time is O(n), for n being length(text), using just an
  17. * auxiliary function PI[1..m], for m being length(pattern),
  18. * precomputed from the pattern in time O(m). The array PI allows
  19. * the transition function DELTA to be computed efficiently
  20. * "on the fly" as needed. Roughly speaking, for any state
  21. * "q" = 0,1,...,m and any character "a" in SIGMA, the value
  22. * PI["q"] contains the information that is independent of "a" and
  23. * is needed to compute DELTA("q", "a") [2]. Since the array PI
  24. * has only m entries, whereas DELTA has O(m|SIGMA|) entries, we
  25. * save a factor of |SIGMA| in the preprocessing time by computing
  26. * PI rather than DELTA.
  27. *
  28. * [1] Cormen, Leiserson, Rivest, Stein
  29. * Introdcution to Algorithms, 2nd Edition, MIT Press
  30. * [2] See finite automation theory
  31. */
  32. #include <linux/module.h>
  33. #include <linux/types.h>
  34. #include <linux/string.h>
  35. #include <linux/ctype.h>
  36. #include <linux/textsearch.h>
  37. struct ts_kmp
  38. {
  39. u8 * pattern;
  40. unsigned int pattern_len;
  41. unsigned int prefix_tbl[0];
  42. };
  43. static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state)
  44. {
  45. struct ts_kmp *kmp = ts_config_priv(conf);
  46. unsigned int i, q = 0, text_len, consumed = state->offset;
  47. const u8 *text;
  48. const int icase = conf->flags & TS_IGNORECASE;
  49. for (;;) {
  50. text_len = conf->get_next_block(consumed, &text, conf, state);
  51. if (unlikely(text_len == 0))
  52. break;
  53. for (i = 0; i < text_len; i++) {
  54. while (q > 0 && kmp->pattern[q]
  55. != (icase ? toupper(text[i]) : text[i]))
  56. q = kmp->prefix_tbl[q - 1];
  57. if (kmp->pattern[q]
  58. == (icase ? toupper(text[i]) : text[i]))
  59. q++;
  60. if (unlikely(q == kmp->pattern_len)) {
  61. state->offset = consumed + i + 1;
  62. return state->offset - kmp->pattern_len;
  63. }
  64. }
  65. consumed += text_len;
  66. }
  67. return UINT_MAX;
  68. }
  69. static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len,
  70. unsigned int *prefix_tbl, int flags)
  71. {
  72. unsigned int k, q;
  73. const u8 icase = flags & TS_IGNORECASE;
  74. for (k = 0, q = 1; q < len; q++) {
  75. while (k > 0 && (icase ? toupper(pattern[k]) : pattern[k])
  76. != (icase ? toupper(pattern[q]) : pattern[q]))
  77. k = prefix_tbl[k-1];
  78. if ((icase ? toupper(pattern[k]) : pattern[k])
  79. == (icase ? toupper(pattern[q]) : pattern[q]))
  80. k++;
  81. prefix_tbl[q] = k;
  82. }
  83. }
  84. static struct ts_config *kmp_init(const void *pattern, unsigned int len,
  85. gfp_t gfp_mask, int flags)
  86. {
  87. struct ts_config *conf;
  88. struct ts_kmp *kmp;
  89. int i;
  90. unsigned int prefix_tbl_len = len * sizeof(unsigned int);
  91. size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len;
  92. conf = alloc_ts_config(priv_size, gfp_mask);
  93. if (IS_ERR(conf))
  94. return conf;
  95. conf->flags = flags;
  96. kmp = ts_config_priv(conf);
  97. kmp->pattern_len = len;
  98. compute_prefix_tbl(pattern, len, kmp->prefix_tbl, flags);
  99. kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len;
  100. if (flags & TS_IGNORECASE)
  101. for (i = 0; i < len; i++)
  102. kmp->pattern[i] = toupper(((u8 *)pattern)[i]);
  103. else
  104. memcpy(kmp->pattern, pattern, len);
  105. return conf;
  106. }
  107. static void *kmp_get_pattern(struct ts_config *conf)
  108. {
  109. struct ts_kmp *kmp = ts_config_priv(conf);
  110. return kmp->pattern;
  111. }
  112. static unsigned int kmp_get_pattern_len(struct ts_config *conf)
  113. {
  114. struct ts_kmp *kmp = ts_config_priv(conf);
  115. return kmp->pattern_len;
  116. }
  117. static struct ts_ops kmp_ops = {
  118. .name = "kmp",
  119. .find = kmp_find,
  120. .init = kmp_init,
  121. .get_pattern = kmp_get_pattern,
  122. .get_pattern_len = kmp_get_pattern_len,
  123. .owner = THIS_MODULE,
  124. .list = LIST_HEAD_INIT(kmp_ops.list)
  125. };
  126. static int __init init_kmp(void)
  127. {
  128. return textsearch_register(&kmp_ops);
  129. }
  130. static void __exit exit_kmp(void)
  131. {
  132. textsearch_unregister(&kmp_ops);
  133. }
  134. MODULE_LICENSE("GPL");
  135. module_init(init_kmp);
  136. module_exit(exit_kmp);