inet_fragment.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. /*
  2. * inet fragments management
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation; either version
  7. * 2 of the License, or (at your option) any later version.
  8. *
  9. * Authors: Pavel Emelyanov <xemul@openvz.org>
  10. * Started as consolidation of ipv4/ip_fragment.c,
  11. * ipv6/reassembly. and ipv6 nf conntrack reassembly
  12. */
  13. #include <linux/list.h>
  14. #include <linux/spinlock.h>
  15. #include <linux/module.h>
  16. #include <linux/timer.h>
  17. #include <linux/mm.h>
  18. #include <linux/random.h>
  19. #include <linux/skbuff.h>
  20. #include <linux/rtnetlink.h>
  21. #include <linux/slab.h>
  22. #include <net/sock.h>
  23. #include <net/inet_frag.h>
  24. #include <net/inet_ecn.h>
  25. /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
  26. * Value : 0xff if frame should be dropped.
  27. * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
  28. */
  29. const u8 ip_frag_ecn_table[16] = {
  30. /* at least one fragment had CE, and others ECT_0 or ECT_1 */
  31. [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
  32. [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
  33. [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
  34. /* invalid combinations : drop frame */
  35. [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
  36. [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
  37. [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
  38. [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
  39. [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
  40. [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
  41. [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
  42. };
  43. EXPORT_SYMBOL(ip_frag_ecn_table);
  44. int inet_frags_init(struct inet_frags *f)
  45. {
  46. f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
  47. NULL);
  48. if (!f->frags_cachep)
  49. return -ENOMEM;
  50. return 0;
  51. }
  52. EXPORT_SYMBOL(inet_frags_init);
  53. void inet_frags_fini(struct inet_frags *f)
  54. {
  55. /* We must wait that all inet_frag_destroy_rcu() have completed. */
  56. rcu_barrier();
  57. kmem_cache_destroy(f->frags_cachep);
  58. f->frags_cachep = NULL;
  59. }
  60. EXPORT_SYMBOL(inet_frags_fini);
  61. static void inet_frags_free_cb(void *ptr, void *arg)
  62. {
  63. struct inet_frag_queue *fq = ptr;
  64. /* If we can not cancel the timer, it means this frag_queue
  65. * is already disappearing, we have nothing to do.
  66. * Otherwise, we own a refcount until the end of this function.
  67. */
  68. if (!del_timer(&fq->timer))
  69. return;
  70. spin_lock_bh(&fq->lock);
  71. if (!(fq->flags & INET_FRAG_COMPLETE)) {
  72. fq->flags |= INET_FRAG_COMPLETE;
  73. atomic_dec(&fq->refcnt);
  74. }
  75. spin_unlock_bh(&fq->lock);
  76. inet_frag_put(fq);
  77. }
  78. void inet_frags_exit_net(struct netns_frags *nf)
  79. {
  80. nf->high_thresh = 0; /* prevent creation of new frags */
  81. rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
  82. }
  83. EXPORT_SYMBOL(inet_frags_exit_net);
  84. void inet_frag_kill(struct inet_frag_queue *fq)
  85. {
  86. if (del_timer(&fq->timer))
  87. atomic_dec(&fq->refcnt);
  88. if (!(fq->flags & INET_FRAG_COMPLETE)) {
  89. struct netns_frags *nf = fq->net;
  90. fq->flags |= INET_FRAG_COMPLETE;
  91. rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
  92. atomic_dec(&fq->refcnt);
  93. }
  94. }
  95. EXPORT_SYMBOL(inet_frag_kill);
  96. static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
  97. struct sk_buff *skb)
  98. {
  99. if (f->skb_free)
  100. f->skb_free(skb);
  101. kfree_skb(skb);
  102. }
  103. static void inet_frag_destroy_rcu(struct rcu_head *head)
  104. {
  105. struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
  106. rcu);
  107. struct inet_frags *f = q->net->f;
  108. if (f->destructor)
  109. f->destructor(q);
  110. kmem_cache_free(f->frags_cachep, q);
  111. }
  112. void inet_frag_destroy(struct inet_frag_queue *q)
  113. {
  114. struct sk_buff *fp;
  115. struct netns_frags *nf;
  116. unsigned int sum, sum_truesize = 0;
  117. struct inet_frags *f;
  118. WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
  119. WARN_ON(del_timer(&q->timer) != 0);
  120. /* Release all fragment data. */
  121. fp = q->fragments;
  122. nf = q->net;
  123. f = nf->f;
  124. if (fp) {
  125. do {
  126. struct sk_buff *xp = fp->next;
  127. sum_truesize += fp->truesize;
  128. frag_kfree_skb(nf, f, fp);
  129. fp = xp;
  130. } while (fp);
  131. } else {
  132. sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
  133. }
  134. sum = sum_truesize + f->qsize;
  135. call_rcu(&q->rcu, inet_frag_destroy_rcu);
  136. sub_frag_mem_limit(nf, sum);
  137. }
  138. EXPORT_SYMBOL(inet_frag_destroy);
  139. static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
  140. struct inet_frags *f,
  141. void *arg)
  142. {
  143. struct inet_frag_queue *q;
  144. if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
  145. return NULL;
  146. q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
  147. if (!q)
  148. return NULL;
  149. q->net = nf;
  150. f->constructor(q, arg);
  151. add_frag_mem_limit(nf, f->qsize);
  152. setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
  153. spin_lock_init(&q->lock);
  154. atomic_set(&q->refcnt, 3);
  155. return q;
  156. }
  157. static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
  158. void *arg,
  159. struct inet_frag_queue **prev)
  160. {
  161. struct inet_frags *f = nf->f;
  162. struct inet_frag_queue *q;
  163. q = inet_frag_alloc(nf, f, arg);
  164. if (!q) {
  165. *prev = ERR_PTR(-ENOMEM);
  166. return NULL;
  167. }
  168. mod_timer(&q->timer, jiffies + nf->timeout);
  169. *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
  170. &q->node, f->rhash_params);
  171. if (*prev) {
  172. q->flags |= INET_FRAG_COMPLETE;
  173. inet_frag_kill(q);
  174. inet_frag_destroy(q);
  175. return NULL;
  176. }
  177. return q;
  178. }
  179. EXPORT_SYMBOL(inet_frag_create);
  180. /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
  181. struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
  182. {
  183. struct inet_frag_queue *fq = NULL, *prev;
  184. rcu_read_lock();
  185. prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
  186. if (!prev)
  187. fq = inet_frag_create(nf, key, &prev);
  188. if (prev && !IS_ERR(prev)) {
  189. fq = prev;
  190. if (!atomic_inc_not_zero(&fq->refcnt))
  191. fq = NULL;
  192. }
  193. rcu_read_unlock();
  194. return fq;
  195. }
  196. EXPORT_SYMBOL(inet_frag_find);