nf_conntrack_ecache.c 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. /* Event cache for netfilter. */
  2. /*
  3. * (C) 2005 Harald Welte <laforge@gnumonks.org>
  4. * (C) 2005 Patrick McHardy <kaber@trash.net>
  5. * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org>
  6. * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org>
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License version 2 as
  10. * published by the Free Software Foundation.
  11. */
  12. #include <linux/types.h>
  13. #include <linux/netfilter.h>
  14. #include <linux/skbuff.h>
  15. #include <linux/vmalloc.h>
  16. #include <linux/stddef.h>
  17. #include <linux/err.h>
  18. #include <linux/percpu.h>
  19. #include <linux/kernel.h>
  20. #include <linux/netdevice.h>
  21. #include <linux/slab.h>
  22. #include <linux/export.h>
  23. #include <net/netfilter/nf_conntrack.h>
  24. #include <net/netfilter/nf_conntrack_core.h>
  25. #include <net/netfilter/nf_conntrack_extend.h>
  26. static DEFINE_MUTEX(nf_ct_ecache_mutex);
  27. #define ECACHE_RETRY_WAIT (HZ/10)
  28. enum retry_state {
  29. STATE_CONGESTED,
  30. STATE_RESTART,
  31. STATE_DONE,
  32. };
  33. static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
  34. {
  35. struct nf_conn *refs[16];
  36. struct nf_conntrack_tuple_hash *h;
  37. struct hlist_nulls_node *n;
  38. unsigned int evicted = 0;
  39. enum retry_state ret = STATE_DONE;
  40. spin_lock(&pcpu->lock);
  41. hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
  42. struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
  43. if (nf_ct_is_dying(ct))
  44. continue;
  45. if (nf_conntrack_event(IPCT_DESTROY, ct)) {
  46. ret = STATE_CONGESTED;
  47. break;
  48. }
  49. /* we've got the event delivered, now it's dying */
  50. set_bit(IPS_DYING_BIT, &ct->status);
  51. refs[evicted] = ct;
  52. if (++evicted >= ARRAY_SIZE(refs)) {
  53. ret = STATE_RESTART;
  54. break;
  55. }
  56. }
  57. spin_unlock(&pcpu->lock);
  58. /* can't _put while holding lock */
  59. while (evicted)
  60. nf_ct_put(refs[--evicted]);
  61. return ret;
  62. }
  63. static void ecache_work(struct work_struct *work)
  64. {
  65. struct netns_ct *ctnet =
  66. container_of(work, struct netns_ct, ecache_dwork.work);
  67. int cpu, delay = -1;
  68. struct ct_pcpu *pcpu;
  69. local_bh_disable();
  70. for_each_possible_cpu(cpu) {
  71. enum retry_state ret;
  72. pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu);
  73. ret = ecache_work_evict_list(pcpu);
  74. switch (ret) {
  75. case STATE_CONGESTED:
  76. delay = ECACHE_RETRY_WAIT;
  77. goto out;
  78. case STATE_RESTART:
  79. delay = 0;
  80. break;
  81. case STATE_DONE:
  82. break;
  83. }
  84. }
  85. out:
  86. local_bh_enable();
  87. ctnet->ecache_dwork_pending = delay > 0;
  88. if (delay >= 0)
  89. schedule_delayed_work(&ctnet->ecache_dwork, delay);
  90. }
  91. /* deliver cached events and clear cache entry - must be called with locally
  92. * disabled softirqs */
  93. void nf_ct_deliver_cached_events(struct nf_conn *ct)
  94. {
  95. struct net *net = nf_ct_net(ct);
  96. unsigned long events, missed;
  97. struct nf_ct_event_notifier *notify;
  98. struct nf_conntrack_ecache *e;
  99. struct nf_ct_event item;
  100. int ret;
  101. rcu_read_lock();
  102. notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
  103. if (notify == NULL)
  104. goto out_unlock;
  105. e = nf_ct_ecache_find(ct);
  106. if (e == NULL)
  107. goto out_unlock;
  108. events = xchg(&e->cache, 0);
  109. if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events)
  110. goto out_unlock;
  111. /* We make a copy of the missed event cache without taking
  112. * the lock, thus we may send missed events twice. However,
  113. * this does not harm and it happens very rarely. */
  114. missed = e->missed;
  115. if (!((events | missed) & e->ctmask))
  116. goto out_unlock;
  117. item.ct = ct;
  118. item.portid = 0;
  119. item.report = 0;
  120. ret = notify->fcn(events | missed, &item);
  121. if (likely(ret >= 0 && !missed))
  122. goto out_unlock;
  123. spin_lock_bh(&ct->lock);
  124. if (ret < 0)
  125. e->missed |= events;
  126. else
  127. e->missed &= ~missed;
  128. spin_unlock_bh(&ct->lock);
  129. out_unlock:
  130. rcu_read_unlock();
  131. }
  132. EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
  133. int nf_conntrack_register_notifier(struct net *net,
  134. struct nf_ct_event_notifier *new)
  135. {
  136. int ret;
  137. struct nf_ct_event_notifier *notify;
  138. mutex_lock(&nf_ct_ecache_mutex);
  139. notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
  140. lockdep_is_held(&nf_ct_ecache_mutex));
  141. if (notify != NULL) {
  142. ret = -EBUSY;
  143. goto out_unlock;
  144. }
  145. rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
  146. ret = 0;
  147. out_unlock:
  148. mutex_unlock(&nf_ct_ecache_mutex);
  149. return ret;
  150. }
  151. EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
  152. void nf_conntrack_unregister_notifier(struct net *net,
  153. struct nf_ct_event_notifier *new)
  154. {
  155. struct nf_ct_event_notifier *notify;
  156. mutex_lock(&nf_ct_ecache_mutex);
  157. notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
  158. lockdep_is_held(&nf_ct_ecache_mutex));
  159. BUG_ON(notify != new);
  160. RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
  161. mutex_unlock(&nf_ct_ecache_mutex);
  162. /* synchronize_rcu() is called from ctnetlink_exit. */
  163. }
  164. EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
  165. int nf_ct_expect_register_notifier(struct net *net,
  166. struct nf_exp_event_notifier *new)
  167. {
  168. int ret;
  169. struct nf_exp_event_notifier *notify;
  170. mutex_lock(&nf_ct_ecache_mutex);
  171. notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
  172. lockdep_is_held(&nf_ct_ecache_mutex));
  173. if (notify != NULL) {
  174. ret = -EBUSY;
  175. goto out_unlock;
  176. }
  177. rcu_assign_pointer(net->ct.nf_expect_event_cb, new);
  178. ret = 0;
  179. out_unlock:
  180. mutex_unlock(&nf_ct_ecache_mutex);
  181. return ret;
  182. }
  183. EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
  184. void nf_ct_expect_unregister_notifier(struct net *net,
  185. struct nf_exp_event_notifier *new)
  186. {
  187. struct nf_exp_event_notifier *notify;
  188. mutex_lock(&nf_ct_ecache_mutex);
  189. notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
  190. lockdep_is_held(&nf_ct_ecache_mutex));
  191. BUG_ON(notify != new);
  192. RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
  193. mutex_unlock(&nf_ct_ecache_mutex);
  194. /* synchronize_rcu() is called from ctnetlink_exit. */
  195. }
  196. EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
  197. #define NF_CT_EVENTS_DEFAULT 1
  198. static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
  199. #ifdef CONFIG_SYSCTL
  200. static struct ctl_table event_sysctl_table[] = {
  201. {
  202. .procname = "nf_conntrack_events",
  203. .data = &init_net.ct.sysctl_events,
  204. .maxlen = sizeof(unsigned int),
  205. .mode = 0644,
  206. .proc_handler = proc_dointvec,
  207. },
  208. {}
  209. };
  210. #endif /* CONFIG_SYSCTL */
  211. static struct nf_ct_ext_type event_extend __read_mostly = {
  212. .len = sizeof(struct nf_conntrack_ecache),
  213. .align = __alignof__(struct nf_conntrack_ecache),
  214. .id = NF_CT_EXT_ECACHE,
  215. };
  216. #ifdef CONFIG_SYSCTL
  217. static int nf_conntrack_event_init_sysctl(struct net *net)
  218. {
  219. struct ctl_table *table;
  220. table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table),
  221. GFP_KERNEL);
  222. if (!table)
  223. goto out;
  224. table[0].data = &net->ct.sysctl_events;
  225. /* Don't export sysctls to unprivileged users */
  226. if (net->user_ns != &init_user_ns)
  227. table[0].procname = NULL;
  228. net->ct.event_sysctl_header =
  229. register_net_sysctl(net, "net/netfilter", table);
  230. if (!net->ct.event_sysctl_header) {
  231. printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n");
  232. goto out_register;
  233. }
  234. return 0;
  235. out_register:
  236. kfree(table);
  237. out:
  238. return -ENOMEM;
  239. }
  240. static void nf_conntrack_event_fini_sysctl(struct net *net)
  241. {
  242. struct ctl_table *table;
  243. table = net->ct.event_sysctl_header->ctl_table_arg;
  244. unregister_net_sysctl_table(net->ct.event_sysctl_header);
  245. kfree(table);
  246. }
  247. #else
  248. static int nf_conntrack_event_init_sysctl(struct net *net)
  249. {
  250. return 0;
  251. }
  252. static void nf_conntrack_event_fini_sysctl(struct net *net)
  253. {
  254. }
  255. #endif /* CONFIG_SYSCTL */
  256. int nf_conntrack_ecache_pernet_init(struct net *net)
  257. {
  258. net->ct.sysctl_events = nf_ct_events;
  259. INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work);
  260. return nf_conntrack_event_init_sysctl(net);
  261. }
  262. void nf_conntrack_ecache_pernet_fini(struct net *net)
  263. {
  264. cancel_delayed_work_sync(&net->ct.ecache_dwork);
  265. nf_conntrack_event_fini_sysctl(net);
  266. }
  267. int nf_conntrack_ecache_init(void)
  268. {
  269. int ret = nf_ct_extend_register(&event_extend);
  270. if (ret < 0)
  271. pr_err("nf_ct_event: Unable to register event extension.\n");
  272. return ret;
  273. }
  274. void nf_conntrack_ecache_fini(void)
  275. {
  276. nf_ct_extend_unregister(&event_extend);
  277. }